Monadical-SAS
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 5 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎compose.yml‎
Lines changed: 1 addition & 0 deletions b/‎compose.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎server/migrations/README‎
Lines changed: 3 additions & 1 deletion b/‎server/migrations/README‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎server/migrations/versions/0bc0f3ff0111_add_webvtt_field_to_transcript.py‎
Lines changed: 27 additions & 0 deletions b/‎server/migrations/versions/0bc0f3ff0111_add_webvtt_field_to_transcript.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎server/migrations/versions/116b2f287eab_add_full_text_search.py‎
Lines changed: 47 additions & 0 deletions b/‎server/migrations/versions/116b2f287eab_add_full_text_search.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎server/migrations/versions/8120ebc75366_populate_webvtt_from_topics.py‎
Lines changed: 109 additions & 0 deletions b/‎server/migrations/versions/8120ebc75366_populate_webvtt_from_topics.py‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎server/pyproject.toml‎
Lines changed: 8 additions & 0 deletions b/‎server/pyproject.toml‎
Lines changed: 8 additions & 0 deletions
@@ -14,3 +14,4 @@ data/
 www/REFACTOR.md
 www/reload-frontend
 server/test.sqlite
+CLAUDE.local.md
@@ -3,10 +3,10 @@
 repos:
   - repo: local
     hooks:
-      - id: yarn-format
-        name: run yarn format
+      - id: format
+        name: run format
         language: system
-        entry: bash -c 'cd www && yarn format'
+        entry: bash -c 'cd www && npx prettier --write .'
         pass_filenames: false
         files: ^www/
 
@@ -23,8 +23,7 @@ repos:
       - id: ruff
         args:
           - --fix
-          - --select
-          - I,F401
+          # Uses select rules from server/pyproject.toml
         files: ^server/
       - id: ruff-format
         files: ^server/
@@ -44,6 +44,7 @@ services:
     working_dir: /app
     volumes:
       - ./www:/app/
+      - /app/node_modules
     env_file:
       - ./www/.env.local
 
 
@@ -1 +1,3 @@
-Generic single-database configuration.
+Generic single-database configuration.
+
+Both data migrations and schema migrations must be in migrations.
@@ -0,0 +1,27 @@
+"""add_webvtt_field_to_transcript
+
+Revision ID: 0bc0f3ff0111
+Revises: b7df9609542c
+Create Date: 2025-08-05 19:36:41.740957
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = '0bc0f3ff0111'
+down_revision: Union[str, None] = 'b7df9609542c'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    op.add_column('transcript',
+        sa.Column('webvtt', sa.Text(), nullable=True)
+    )
+
+
+def downgrade() -> None:
+    op.drop_column('transcript', 'webvtt')
@@ -0,0 +1,47 @@
+"""add_full_text_search
+
+Revision ID: 116b2f287eab
+Revises: 0bc0f3ff0111
+Create Date: 2025-08-07 11:27:38.473517
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = '116b2f287eab'
+down_revision: Union[str, None] = '0bc0f3ff0111'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    conn = op.get_bind()
+    if conn.dialect.name != 'postgresql':
+        return
+    
+    op.execute("""
+        ALTER TABLE transcript ADD COLUMN search_vector_en tsvector 
+        GENERATED ALWAYS AS (
+            setweight(to_tsvector('english', coalesce(title, '')), 'A') ||
+            setweight(to_tsvector('english', coalesce(webvtt, '')), 'B')
+        ) STORED
+    """)
+    
+    op.create_index(
+        'idx_transcript_search_vector_en',
+        'transcript',
+        ['search_vector_en'],
+        postgresql_using='gin'
+    )
+
+
+def downgrade() -> None:
+    conn = op.get_bind()
+    if conn.dialect.name != 'postgresql':
+        return
+    
+    op.drop_index('idx_transcript_search_vector_en', table_name='transcript')
+    op.drop_column('transcript', 'search_vector_en')
@@ -0,0 +1,109 @@
+"""populate_webvtt_from_topics
+
+Revision ID: 8120ebc75366
+Revises: 116b2f287eab
+Create Date: 2025-08-11 19:11:01.316947
+
+"""
+import json
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy import text
+
+
+# revision identifiers, used by Alembic.
+revision: str = '8120ebc75366'
+down_revision: Union[str, None] = '116b2f287eab'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def topics_to_webvtt(topics):
+    """Convert topics list to WebVTT format string."""
+    if not topics:
+        return None
+
+    lines = ["WEBVTT", ""]
+
+    for topic in topics:
+        start_time = format_timestamp(topic.get("start"))
+        end_time = format_timestamp(topic.get("end"))
+        text = topic.get("text", "").strip()
+
+        if start_time and end_time and text:
+            lines.append(f"{start_time} --> {end_time}")
+            lines.append(text)
+            lines.append("")
+
+    return "\n".join(lines).strip()
+
+
+def format_timestamp(seconds):
+    """Format seconds to WebVTT timestamp format (HH:MM:SS.mmm)."""
+    if seconds is None:
+        return None
+
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = seconds % 60
+
+    return f"{hours:02d}:{minutes:02d}:{secs:06.3f}"
+
+
+def upgrade() -> None:
+    """Populate WebVTT field for all transcripts with topics."""
+
+    # Get connection
+    connection = op.get_bind()
+
+    # Query all transcripts with topics
+    result = connection.execute(
+        text("SELECT id, topics FROM transcript WHERE topics IS NOT NULL")
+    )
+
+    rows = result.fetchall()
+    print(f"Found {len(rows)} transcripts with topics")
+
+    updated_count = 0
+    error_count = 0
+
+    for row in rows:
+        transcript_id = row[0]
+        topics_data = row[1]
+
+        if not topics_data:
+            continue
+
+        try:
+            # Parse JSON if it's a string
+            if isinstance(topics_data, str):
+                topics_data = json.loads(topics_data)
+
+            # Convert topics to WebVTT format
+            webvtt_content = topics_to_webvtt(topics_data)
+
+            if webvtt_content:
+                # Update the webvtt field
+                connection.execute(
+                    text("UPDATE transcript SET webvtt = :webvtt WHERE id = :id"),
+                    {"webvtt": webvtt_content, "id": transcript_id}
+                )
+                updated_count += 1
+                print(f"✓ Updated transcript {transcript_id}")
+
+        except Exception as e:
+            error_count += 1
+            print(f"✗ Error updating transcript {transcript_id}: {e}")
+
+    print(f"\nMigration complete!")
+    print(f"  Updated: {updated_count}")
+    print(f"  Errors: {error_count}")
+
+
+def downgrade() -> None:
+    """Clear WebVTT field for all transcripts."""
+    op.execute(
+        text("UPDATE transcript SET webvtt = NULL")
+    )
@@ -40,6 +40,7 @@ dependencies = [
     "llama-index>=0.12.52",
     "llama-index-llms-openai-like>=0.4.0",
     "pytest-env>=1.1.5",
+    "webvtt-py>=0.5.0",
 ]
 
 [dependency-groups]
@@ -92,5 +93,12 @@ addopts = "-ra -q --disable-pytest-warnings --cov --cov-report html -v"
 testpaths = ["tests"]
 asyncio_mode = "auto"
 
+[tool.ruff.lint]
+select = [
+    "I",       # isort - import sorting
+    "F401",    # unused imports
+    "PLC0415", # import-outside-top-level - detect inline imports
+]
+
 [tool.ruff.lint.per-file-ignores]
 "reflector/processors/summary/summary_builder.py" = ["E501"]