Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,19 @@ PINECONE_API_KEY=changethis

OPENAI_API_KEY=changethis

NEXT_PUBLIC_BACKEND_BASE_URL=http://localhost:8000
NEXT_PUBLIC_BACKEND_BASE_URL=http://localhost:8000

NEXT_INTERNAL_BACKEND_BASE_URL=http://backend:8000

# Podcast storage configuration
# "local" will store files under backend container at /app/podcasts
# "s3" will upload to an S3 bucket using the credentials below
PODCAST_STORAGE=local
PODCAST_LOCAL_DIR=/app/podcasts
AWS_ACCESS_KEY_ID=AKIA...
AWS_SECRET_ACCESS_KEY=ieb...
AWS_REGION=changethis
S3_BUCKET_NAME=changethis
S3_PREFIX=podcasts/
PODCAST_TEACHER_VOICE=coral
PODCAST_STUDENT_VOICE=alloy
38 changes: 38 additions & 0 deletions backend/app/alembic/versions/2042a1f0c0a1_add_podcast_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""add podcast table

Revision ID: 2042a1f0c0a1
Revises: 10368f38610b
Create Date: 2025-10-05 06:00:00.000000

"""
from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes


# revision identifiers, used by Alembic.
revision = '2042a1f0c0a1'
down_revision = '2cde6f094a4e'
branch_labels = None
depends_on = None


def upgrade():
op.create_table(
'podcast',
sa.Column('id', sa.Uuid(), nullable=False),
sa.Column('course_id', sa.Uuid(), nullable=False),
sa.Column('title', sqlmodel.sql.sqltypes.AutoString(length=255), nullable=False),
sa.Column('transcript', sa.Text(), nullable=False),
sa.Column('audio_path', sqlmodel.sql.sqltypes.AutoString(length=1024), nullable=False),
sa.Column('storage_backend', sqlmodel.sql.sqltypes.AutoString(length=50), nullable=False),
sa.Column('duration_seconds', sa.Float(), nullable=True),
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=False),
sa.Column('updated_at', sa.DateTime(), nullable=False),
sa.ForeignKeyConstraint(['course_id'], ['course.id'], ondelete='CASCADE'),
sa.PrimaryKeyConstraint('id')
)


def downgrade():
op.drop_table('podcast')
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""merge heads: podcast + dev

Revision ID: a9b7c6d5e4f3
Revises: ('2042a1f0c0a1', '64343f21e9a8')
Create Date: 2025-10-06 00:00:00

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'a9b7c6d5e4f3'
down_revision = ('2042a1f0c0a1', '64343f21e9a8')
branch_labels = None
depends_on = None


def upgrade():
# Merge point: no-op
pass


def downgrade():
# Merge point: no-op
pass

2 changes: 2 additions & 0 deletions backend/app/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
courses,
documents,
items,
podcasts,
login,
private,
quiz_sessions,
Expand All @@ -21,6 +22,7 @@
api_router.include_router(courses.router)
api_router.include_router(chat.router)
api_router.include_router(documents.router)
api_router.include_router(podcasts.router)
api_router.include_router(quiz_sessions.router)

if settings.ENVIRONMENT == "local":
Expand Down
17 changes: 5 additions & 12 deletions backend/app/api/routes/courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,27 +171,20 @@ def delete_course(
return {"message": "Course deleted successfully"}


@router.get("/{id}/documents", response_model=list[dict[str, Any]])
@router.get("/{id}/documents", response_model=list[DocumentPublic])
async def list_documents(
id: str, session: SessionDep, skip: int = 0, limit: int = 100
) -> list[dict[str, Any]]:
) -> list[DocumentPublic]:
"""
List documents for a specific course.
"""
statement = (
select(Document).where(Document.course_id == id).offset(skip).limit(limit)
)
documents = session.exec(statement).all()
return [
{
"id": str(doc.id),
"filename": doc.filename,
"chunk_count": doc.chunk_count,
"status": doc.status,
"updated_at": doc.updated_at.isoformat(),
}
for doc in documents
]

# Use the public schema's from_attributes (ORM mode) to convert DB models
return [DocumentPublic.model_validate(doc) for doc in documents]


@router.get("/{id}/quizzes", response_model=QuizzesPublic)
Expand Down
21 changes: 15 additions & 6 deletions backend/app/api/routes/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
import uuid
from asyncio.log import logger
import logging
from datetime import datetime, timezone
from typing import Any

Expand All @@ -21,7 +22,7 @@
from app.models.course import Course
from app.models.document import Document
from app.models.embeddings import Chunk
from app.schemas.public import DocumentStatus
from app.schemas.public import DocumentStatus, DocumentPublic
from app.tasks import generate_quizzes_task

router = APIRouter(prefix="/documents", tags=["documents"])
Expand All @@ -37,6 +38,7 @@
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024

pc = Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV_NAME)
log = logging.getLogger(__name__)

task_status: dict[str, str] = {}

Expand All @@ -48,6 +50,12 @@ def ensure_index_exists():
if pc.has_index(index_name):
existing = pc.describe_index(index_name)
if existing.dimension != EXPECTED_DIMENSION:
log.warning(
"[DOCS] Index dimension mismatch | name=%s | have=%s want=%s — recreating",
index_name,
existing.dimension,
EXPECTED_DIMENSION,
)
pc.delete_index(index_name)
pc.create_index(
name=index_name,
Expand Down Expand Up @@ -165,6 +173,7 @@ async def process_pdf_task(file_path: str, document_id: uuid.UUID, session: Sess
"id": embedding_uuid,
"values": embedding,
"metadata": {
"course_id": str(document.course_id),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for fixing this. I was curious why it wasn't working.

"document_id": str(document_id),
"chunk_id": str(record.id),
"text": record.text_content,
Expand Down Expand Up @@ -290,8 +299,8 @@ async def process_multiple_documents(
return {"message": "Processing started for multiple files", "documents": results}


@router.get("/{id}", response_model=Document)
def read_document(session: SessionDep, current_user: CurrentUser, id: uuid.UUID) -> Any:
@router.get("/{id}", response_model=DocumentPublic)
def read_document(session: SessionDep, current_user: CurrentUser, id: uuid.UUID) -> DocumentPublic:
"""Get a document by its ID, ensuring the user has permissions."""
statement = (
select(Document)
Expand All @@ -308,7 +317,7 @@ def read_document(session: SessionDep, current_user: CurrentUser, id: uuid.UUID)
detail="Document not found or you do not have permission to access it.",
)

return document
return DocumentPublic.model_validate(document)


def delete_embeddings_task(document_id: uuid.UUID):
Expand All @@ -321,13 +330,13 @@ def delete_embeddings_task(document_id: uuid.UUID):
logger.error(f"Failed to delete embeddings for document {document_id}: {e}")


@router.delete("/{id}")
@router.delete("/{id}", response_model=Message)
def delete_document(
session: SessionDep,
current_user: CurrentUser,
id: uuid.UUID,
background_tasks: BackgroundTasks,
) -> Any:
) -> Message:
"""Delete a document by its ID, ensuring the user has permissions."""

document = session.exec(
Expand Down
140 changes: 140 additions & 0 deletions backend/app/api/routes/podcasts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
import os
import uuid
from typing import Any

from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse, StreamingResponse
from sqlalchemy.orm import selectinload
from sqlmodel import select

from app.api.deps import CurrentUser, SessionDep
from app.core.config import settings
from app.models.podcast import Podcast
from app.schemas.internal import GeneratePodcastRequest
from app.schemas.public import PodcastPublic, PodcastsPublic
from app.services.podcast_service import generate_podcast_for_course

router = APIRouter(prefix="/podcasts", tags=["podcasts"])


@router.get("/course/{course_id}", response_model=PodcastsPublic)
def list_podcasts(course_id: uuid.UUID, session: SessionDep, _current_user: CurrentUser, skip: int = 0, limit: int = 50) -> PodcastsPublic:
pods = session.exec(select(Podcast).where(Podcast.course_id == course_id).order_by(Podcast.created_at.desc()).offset(skip).limit(limit)).all()
return PodcastsPublic(data=[PodcastPublic.model_validate(p) for p in pods])



@router.post("/course/{course_id}/generate", response_model=PodcastPublic)
async def generate_podcast(
course_id: uuid.UUID,
session: SessionDep,
_current_user: CurrentUser,
body: GeneratePodcastRequest,
) -> PodcastPublic:
title = body.title.strip()
mode = body.mode
topics = body.topics
teacher_voice = body.teacher_voice or settings.PODCAST_TEACHER_VOICE
student_voice = body.student_voice or settings.PODCAST_STUDENT_VOICE
narrator_voice = body.narrator_voice or settings.PODCAST_TEACHER_VOICE
doc_ids = body.document_ids
podcast = await generate_podcast_for_course(
session,
course_id,
title,
teacher_voice,
student_voice,
narrator_voice,
mode,
topics,
doc_ids,
)
return PodcastPublic.model_validate(podcast)


@router.get("/{podcast_id}", response_model=PodcastPublic)
def get_podcast(podcast_id: uuid.UUID, session: SessionDep, _current_user: CurrentUser) -> PodcastPublic:
pod = session.get(Podcast, podcast_id)
if not pod:
raise HTTPException(status_code=404, detail="Podcast not found")
return PodcastPublic.model_validate(pod)


@router.get("/{podcast_id}/audio")
def stream_audio(podcast_id: uuid.UUID, session: SessionDep, _current_user: CurrentUser):
pod = session.get(Podcast, podcast_id)
if not pod:
raise HTTPException(status_code=404, detail="Podcast not found")
if pod.storage_backend == "local":
file_path = pod.audio_path
if not os.path.exists(file_path):
raise HTTPException(status_code=404, detail="Audio file missing")
def iterfile():
with open(file_path, "rb") as f:
while chunk := f.read(8192):
yield chunk
return StreamingResponse(iterfile(), media_type="audio/mpeg")
else:
# For S3, return a presigned URL to let client fetch directly
try:
import boto3
s3 = boto3.client(
"s3",
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Share the S3 credentials with @deluakin so that he can add them to the Render Backend API service.

aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
region_name=settings.AWS_REGION,
)
bucket = settings.S3_BUCKET_NAME
if not bucket:
raise ValueError("S3 bucket not configured")
key = pod.audio_path.replace(f"s3://{bucket}/", "") if pod.audio_path.startswith("s3://") else pod.audio_path
url = s3.generate_presigned_url(
ClientMethod='get_object',
Params={'Bucket': bucket, 'Key': key},
ExpiresIn=3600,
)
return JSONResponse({"url": url})
except Exception as e:
raise HTTPException(status_code=500, detail=f"Failed to generate S3 URL: {e}")


@router.delete("/{podcast_id}")
def delete_podcast(podcast_id: uuid.UUID, session: SessionDep, current_user: CurrentUser) -> dict[str, str]:
pod = session.exec(
select(Podcast).where(Podcast.id == podcast_id).options(selectinload(Podcast.course)) # type: ignore
).first()

if not pod:
raise HTTPException(status_code=404, detail="Podcast not found")

# Permission: owner or superuser
if not current_user.is_superuser and getattr(pod, "course", None) and pod.course.owner_id != current_user.id: # type: ignore
raise HTTPException(status_code=403, detail="Not enough permissions to delete this podcast")

# Best-effort delete of underlying media
try:
if pod.storage_backend == "local" and pod.audio_path and os.path.exists(pod.audio_path):
try:
os.remove(pod.audio_path)
except Exception:
pass
elif pod.storage_backend == "s3" and pod.audio_path:
try:
import boto3
bucket = settings.S3_BUCKET_NAME
if bucket:
key = pod.audio_path.replace(f"s3://{bucket}/", "") if pod.audio_path.startswith("s3://") else pod.audio_path
s3 = boto3.client(
"s3",
aws_access_key_id=settings.AWS_ACCESS_KEY_ID,
aws_secret_access_key=settings.AWS_SECRET_ACCESS_KEY,
region_name=settings.AWS_REGION,
)
s3.delete_object(Bucket=bucket, Key=key)
except Exception:
# ignore media delete failures
pass
finally:
session.delete(pod)
session.commit()
return {"message": "Podcast deleted successfully"}
11 changes: 11 additions & 0 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,17 @@ def emails_enabled(self) -> bool:
FIRST_SUPERUSER: EmailStr
FIRST_SUPERUSER_PASSWORD: str

# Podcast/Audio storage settings
PODCAST_STORAGE: Literal["local", "s3"] = "local"
PODCAST_LOCAL_DIR: str = "/app/podcasts"
AWS_ACCESS_KEY_ID: str | None = None
AWS_SECRET_ACCESS_KEY: str | None = None
AWS_REGION: str | None = None
S3_BUCKET_NAME: str | None = None
S3_PREFIX: str = "podcasts/"
PODCAST_TEACHER_VOICE: str = "coral"
PODCAST_STUDENT_VOICE: str = "alloy"

def _check_default_secret(self, var_name: str, value: str | None) -> None:
if value == "changethis":
message = (
Expand Down
1 change: 1 addition & 0 deletions backend/app/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .chat import Chat # noqa: F401
from .podcast import Podcast # noqa: F401
from .common import * # noqa: F403, if you have base mixins here
from .course import Course # noqa: F401
from .document import Document # noqa: F401
Expand Down
7 changes: 7 additions & 0 deletions backend/app/models/course.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,13 @@ class Course(CourseBase, table=True):
sa_relationship_kwargs={"cascade": "all, delete-orphan"},
)
chats: list["Chat"] = Relationship(back_populates="course") # noqa: F821 # type: ignore
podcasts: list["Podcast"] = Relationship(
back_populates="course",
sa_relationship_kwargs={
"cascade": "all, delete-orphan",
"passive_deletes": True,
},
)

created_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc),
Expand Down
Loading