diff --git a/backend/app/alembic/versions/040_add_db_comments.py b/backend/app/alembic/versions/040_add_db_comments.py new file mode 100644 index 00000000..12f5d717 --- /dev/null +++ b/backend/app/alembic/versions/040_add_db_comments.py @@ -0,0 +1,3253 @@ +"""add_db_comments + +Revision ID: 040 +Revises: 039 +Create Date: 2025-12-12 16:29:47.694694 + +""" +from alembic import op +import sqlalchemy as sa +import sqlmodel.sql.sqltypes +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "040" +down_revision = "039" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "apikey", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "user_id", + existing_type=sa.INTEGER(), + comment="Reference to the user for whom the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_prefix", + existing_type=sa.VARCHAR(), + comment="Unique prefix portion of the API key for identification", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_hash", + existing_type=sa.VARCHAR(), + comment="Bcrypt hash of the secret of the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the API key was last updated", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the API key was deleted", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the batch job", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('batch_job_id_seq'::regclass)"), + ) + op.alter_column( + "batch_job", + "provider", + existing_type=sa.VARCHAR(), + comment="LLM provider name (e.g., openai, anthropic)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "job_type", + existing_type=sa.VARCHAR(), + comment="Type of batch job (e.g., evaluation, classification, embedding)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Complete batch configuration including model, temperature, instructions, tools, etc.", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "batch_job", + "provider_batch_id", + existing_type=sa.VARCHAR(), + comment="Provider's batch job ID (e.g., OpenAI batch_id)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_file_id", + existing_type=sa.VARCHAR(), + comment="Provider's input file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_output_file_id", + existing_type=sa.VARCHAR(), + comment="Provider's output file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_status", + existing_type=sa.VARCHAR(), + comment="Provider-specific status (e.g., validating, in_progress, completed, failed)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "raw_output_url", + existing_type=sa.VARCHAR(), + comment="S3 URL of raw batch output file", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "total_items", + existing_type=sa.INTEGER(), + comment="Total number of items in the batch", + existing_nullable=False, + existing_server_default=sa.text("0"), + ) + op.alter_column( + "batch_job", + "error_message", + existing_type=sa.TEXT(), + comment="Error message if batch failed", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the batch job was started", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the batch job was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the collection", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_id", + existing_type=sa.VARCHAR(), + comment="External LLM service identifier (e.g., OpenAI vector store ID)", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_name", + existing_type=sa.VARCHAR(), + comment="Name of the LLM service provider", + existing_nullable=False, + ) + op.alter_column( + "collection", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "collection", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the collection was created", + existing_nullable=False, + ) + op.alter_column( + "collection", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the collection was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the collection was deleted", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the collection job", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESSFUL", "FAILED", name="collectionjobstatus" + ), + comment="Current job status (PENDING, PROCESSING, SUCCESSFUL, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "action_type", + existing_type=postgresql.ENUM("CREATE", "DELETE", name="collectionactiontype"), + comment="Type of operation (CREATE, DELETE)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "task_id", + existing_type=sa.VARCHAR(), + comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "trace_id", + existing_type=sa.VARCHAR(), + comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "error_message", + existing_type=sa.TEXT(), + comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "collection_id", + existing_type=sa.UUID(), + comment="Reference to the collection", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "config", + "name", + existing_type=sa.VARCHAR(length=128), + comment="Configuration name", + existing_nullable=False, + ) + op.alter_column( + "config", + "description", + existing_type=sa.VARCHAR(length=512), + comment="Description of the configuration", + existing_nullable=True, + ) + op.alter_column( + "config", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the configuration", + existing_nullable=False, + ) + op.alter_column( + "config", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "config", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the configuration was created", + existing_nullable=False, + ) + op.alter_column( + "config", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the configuration was last updated", + existing_nullable=False, + ) + op.alter_column( + "config", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the configuration was deleted", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "config_blob", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Provider-specific configuration parameters (temperature, max_tokens, etc.)", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "commit_message", + existing_type=sa.VARCHAR(length=512), + comment="Optional message describing the changes in this version", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the configuration version", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "config_id", + existing_type=sa.UUID(), + comment="Reference to the parent configuration", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "version", + existing_type=sa.INTEGER(), + comment="Version number starting at 1", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the version was created", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the version was last updated", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the version was soft-deleted", + existing_nullable=True, + ) + op.alter_column( + "credential", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if this credential is currently active and usable", + existing_nullable=False, + ) + op.alter_column( + "credential", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "credential", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "credential", + "id", + existing_type=sa.INTEGER(), + comment="Unique ID for the credential", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "credential", + "provider", + existing_type=sa.VARCHAR(), + comment="Provider name like 'openai', 'gemini'", + existing_nullable=False, + ) + op.alter_column( + "credential", + "credential", + existing_type=sa.VARCHAR(), + comment="Encrypted JSON string containing provider-specific API credentials", + existing_nullable=False, + ) + op.alter_column( + "credential", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the credential was created", + existing_nullable=False, + ) + op.alter_column( + "credential", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the credential was last updated", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the transformation job", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "COMPLETED", "FAILED", name="transformationstatus" + ), + comment="Current status (PENDING, PROCESSING, COMPLETED, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "task_id", + existing_type=sa.VARCHAR(), + comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "trace_id", + existing_type=sa.VARCHAR(), + comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error message if transformation failed", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "source_document_id", + existing_type=sa.UUID(), + comment="Reference to the source document being transformed", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "transformed_document_id", + existing_type=sa.UUID(), + comment="Reference to the resulting transformed document", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "document", + "fname", + existing_type=sa.VARCHAR(), + comment="Original filename of the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "document", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "object_store_url", + existing_type=sa.VARCHAR(), + comment="Cloud storage URL for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "document", + "source_document_id", + existing_type=sa.UUID(), + comment="Reference to source document if this is a transformation", + existing_nullable=True, + ) + op.alter_column( + "document", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the document was uploaded", + existing_nullable=False, + ) + op.alter_column( + "document", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the document was last updated", + existing_nullable=False, + ) + op.alter_column( + "document", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the document was deleted", + existing_nullable=True, + ) + op.alter_column( + "documentcollection", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the document-collection link", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "documentcollection", + "document_id", + existing_type=sa.UUID(), + comment="Reference to the document", + existing_nullable=False, + ) + op.alter_column( + "documentcollection", + "collection_id", + existing_type=sa.UUID(), + comment="Reference to the collection", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the dataset", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text( + "nextval('evaluation_dataset_id_seq'::regclass)" + ), + ) + op.alter_column( + "evaluation_dataset", + "name", + existing_type=sa.VARCHAR(), + comment="Name of the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "description", + existing_type=sa.VARCHAR(), + comment="Description of the dataset", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "dataset_metadata", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Dataset metadata (item counts, duplication factor, etc.)", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "evaluation_dataset", + "object_store_url", + existing_type=sa.VARCHAR(), + comment="S3 URL where the dataset CSV is stored", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "langfuse_dataset_id", + existing_type=sa.VARCHAR(), + comment="Langfuse dataset ID for observability integration", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation dataset was created", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation dataset was last updated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the evaluation run", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "evaluation_run", + "run_name", + existing_type=sa.VARCHAR(), + comment="Name of the evaluation run", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "dataset_name", + existing_type=sa.VARCHAR(), + comment="Name of the Langfuse dataset used", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Evaluation configuration (model, instructions, etc.)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "dataset_id", + existing_type=sa.INTEGER(), + comment="Reference to the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "batch_job_id", + existing_type=sa.INTEGER(), + comment="Reference to the batch job for responses", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "embedding_batch_job_id", + existing_type=sa.INTEGER(), + comment="Reference to the batch job for embedding similarity scoring", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "status", + existing_type=sa.VARCHAR(), + comment="Evaluation status (pending, processing, completed, failed)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "object_store_url", + existing_type=sa.VARCHAR(), + comment="S3 URL of processed evaluation results", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "total_items", + existing_type=sa.INTEGER(), + comment="Total number of items evaluated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "score", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment="Evaluation scores (correctness, cosine_similarity, etc.)", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "error_message", + existing_type=sa.TEXT(), + comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "base_model", + existing_type=sa.VARCHAR(), + comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment="Train/test split ratio for the dataset", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "training_file_id", + existing_type=sa.VARCHAR(), + comment="OpenAI training file identifier", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "system_prompt", + existing_type=sa.TEXT(), + comment="System prompt used during fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the fine-tuning job", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "fine_tuning", + "provider_job_id", + existing_type=sa.VARCHAR(), + comment="Fine-tuning job ID returned by the provider", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="finetuningstatus" + ), + comment="Current status of the fine-tuning job", + existing_nullable=False, + existing_server_default=sa.text("'pending'::finetuningstatus"), + ) + op.alter_column( + "fine_tuning", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment="Name of the resulting fine-tuned model", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "train_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URI of the training data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URI of the testing data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "document_id", + existing_type=sa.UUID(), + comment="Reference to the training document", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was deleted", + existing_nullable=True, + ) + op.alter_column( + "job", + "id", + existing_type=sa.UUID(), + comment="Unique identifier for the job", + existing_nullable=False, + ) + op.alter_column( + "job", + "task_id", + existing_type=sa.VARCHAR(), + comment="Celery task ID returned when job is queued", + existing_nullable=True, + ) + op.alter_column( + "job", + "trace_id", + existing_type=sa.VARCHAR(), + comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "job", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error details if the job fails", + existing_nullable=True, + ) + op.alter_column( + "job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESS", "FAILED", name="jobstatus" + ), + comment="Current state of the job (PENDING, PROCESSING, SUCCESS, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "job", + "job_type", + existing_type=postgresql.ENUM("RESPONSE", "LLM_API", name="jobtype"), + comment="Type of job being executed (e.g., RESPONSE, LLM_API)", + existing_nullable=False, + ) + op.alter_column( + "job", + "created_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the evaluation", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "model_evaluation", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment="Name of the fine-tuned model being evaluated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URI of the testing data", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "base_model", + existing_type=sa.VARCHAR(), + comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment="Train/test split ratio used", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "system_prompt", + existing_type=sa.TEXT(), + comment="System prompt used during evaluation", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "score", + existing_type=postgresql.JSON(astext_type=sa.Text()), + comment="Evaluation scores per metric (e.g., MCC)", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "prediction_data_s3_object", + existing_type=sa.VARCHAR(), + comment="S3 URL where the prediction data is stored", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="modelevaluationstatus" + ), + comment="Current status of the evaluation", + existing_nullable=False, + existing_server_default=sa.text("'pending'::modelevaluationstatus"), + ) + op.alter_column( + "model_evaluation", + "error_message", + existing_type=sa.VARCHAR(), + comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "fine_tuning_id", + existing_type=sa.INTEGER(), + comment="Reference to the fine-tuning job", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "document_id", + existing_type=sa.UUID(), + comment="Reference to the evaluation document", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation was created", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation was last updated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the evaluation was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "assistant_id", + existing_type=sa.VARCHAR(length=255), + comment="Unique identifier for the assistant at OpenAI", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "name", + existing_type=sa.VARCHAR(length=255), + comment="Name of the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "instructions", + existing_type=sa.TEXT(), + comment="System instructions for the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "model", + existing_type=sa.VARCHAR(), + comment="OpenAI model used by the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "vector_store_ids", + existing_type=postgresql.ARRAY(sa.VARCHAR()), + comment="List of OpenAI vector store IDs attached", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "temperature", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment="Parameter that controls the creativity or randomness of the text generated by model", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "max_num_results", + existing_type=sa.INTEGER(), + comment="Parameter that controls maximum number of results to return", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the assistant", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_assistant", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + existing_server_default=sa.text("false"), + ) + op.alter_column( + "openai_assistant", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the assistant was created", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the assistant was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the assistant was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "response_id", + existing_type=sa.VARCHAR(), + comment="OpenAI response identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "ancestor_response_id", + existing_type=sa.VARCHAR(), + comment="Root response ID for conversation threading", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "previous_response_id", + existing_type=sa.VARCHAR(), + comment="Previous response ID in the conversation chain", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "user_question", + existing_type=sa.VARCHAR(), + comment="User's question or input text", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "response", + existing_type=sa.VARCHAR(), + comment="Response generated by OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "model", + existing_type=sa.VARCHAR(), + comment="Model used to generate the response", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "assistant_id", + existing_type=sa.VARCHAR(), + comment="OpenAI assistant identifier if used", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "project_id", + existing_type=sa.INTEGER(), + comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the conversation record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_conversation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the conversation was created", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the conversation was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the conversation was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "thread_id", + existing_type=sa.VARCHAR(), + comment="OpenAI thread identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "prompt", + existing_type=sa.VARCHAR(), + comment="User prompt sent to the thread", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "response", + existing_type=sa.VARCHAR(), + comment="Response received from OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "status", + existing_type=sa.VARCHAR(), + comment="Current status of the thread interaction", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "error", + existing_type=sa.VARCHAR(), + comment="Error message if the interaction failed", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the thread record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_thread", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the record was created", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the record was last updated", + existing_nullable=False, + ) + op.alter_column( + "organization", + "name", + existing_type=sa.VARCHAR(length=255), + comment="Organization name (unique identifier)", + existing_nullable=False, + ) + op.alter_column( + "organization", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if the organization is active", + existing_nullable=False, + ) + op.alter_column( + "organization", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the organization", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('organization_id_seq'::regclass)"), + ) + op.alter_column( + "organization", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the organization was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "organization", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the organization was last updated", + existing_nullable=False, + ) + op.alter_column( + "project", + "name", + existing_type=sa.VARCHAR(length=255), + comment="Project name", + existing_nullable=False, + ) + op.alter_column( + "project", + "description", + existing_type=sa.VARCHAR(length=500), + comment="Project description", + existing_nullable=True, + ) + op.alter_column( + "project", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if the project is active", + existing_nullable=False, + ) + op.alter_column( + "project", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the project", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('project_id_seq'::regclass)"), + ) + op.alter_column( + "project", + "storage_path", + existing_type=sa.UUID(), + comment="Unique UUID used for cloud storage path", + existing_nullable=False, + ) + op.alter_column( + "project", + "organization_id", + existing_type=sa.INTEGER(), + comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "project", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the project was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "project", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment="Timestamp when the project was last updated", + existing_nullable=False, + ) + op.alter_column( + "user", + "email", + existing_type=sa.VARCHAR(length=255), + comment="User's email address", + existing_nullable=False, + ) + op.alter_column( + "user", + "is_active", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if the user account is active", + existing_nullable=False, + ) + op.alter_column( + "user", + "is_superuser", + existing_type=sa.BOOLEAN(), + comment="Flag indicating if user has superuser privileges", + existing_nullable=False, + ) + op.alter_column( + "user", + "full_name", + existing_type=sa.VARCHAR(length=255), + comment="User's full name", + existing_nullable=True, + ) + op.alter_column( + "user", + "id", + existing_type=sa.INTEGER(), + comment="Unique identifier for the user", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), + ) + op.alter_column( + "user", + "hashed_password", + existing_type=sa.VARCHAR(), + comment="Bcrypt hash of the user's password", + existing_nullable=False, + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "user", + "hashed_password", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Bcrypt hash of the user's password", + existing_nullable=False, + ) + op.alter_column( + "user", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the user", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('user_id_seq'::regclass)"), + ) + op.alter_column( + "user", + "full_name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="User's full name", + existing_nullable=True, + ) + op.alter_column( + "user", + "is_superuser", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if user has superuser privileges", + existing_nullable=False, + ) + op.alter_column( + "user", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if the user account is active", + existing_nullable=False, + ) + op.alter_column( + "user", + "email", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="User's email address", + existing_nullable=False, + ) + op.alter_column( + "project", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the project was last updated", + existing_nullable=False, + ) + op.alter_column( + "project", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the project was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "project", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "project", + "storage_path", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique UUID used for cloud storage path", + existing_nullable=False, + ) + op.alter_column( + "project", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the project", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('project_id_seq'::regclass)"), + ) + op.alter_column( + "project", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if the project is active", + existing_nullable=False, + ) + op.alter_column( + "project", + "description", + existing_type=sa.VARCHAR(length=500), + comment=None, + existing_comment="Project description", + existing_nullable=True, + ) + op.alter_column( + "project", + "name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Project name", + existing_nullable=False, + ) + op.alter_column( + "organization", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the organization was last updated", + existing_nullable=False, + ) + op.alter_column( + "organization", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the organization was created", + existing_nullable=False, + existing_server_default=sa.text("now()"), + ) + op.alter_column( + "organization", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the organization", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('organization_id_seq'::regclass)"), + ) + op.alter_column( + "organization", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if the organization is active", + existing_nullable=False, + ) + op.alter_column( + "organization", + "name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Organization name (unique identifier)", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the record was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the record was created", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the thread record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_thread", + "error", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if the interaction failed", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "status", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Current status of the thread interaction", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "response", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Response received from OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_thread", + "prompt", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="User prompt sent to the thread", + existing_nullable=False, + ) + op.alter_column( + "openai_thread", + "thread_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI thread identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the conversation was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the conversation was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the conversation was created", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the conversation record", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_conversation", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "assistant_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI assistant identifier if used", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Model used to generate the response", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "response", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Response generated by OpenAI", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "user_question", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="User's question or input text", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "previous_response_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Previous response ID in the conversation chain", + existing_nullable=True, + ) + op.alter_column( + "openai_conversation", + "ancestor_response_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Root response ID for conversation threading", + existing_nullable=False, + ) + op.alter_column( + "openai_conversation", + "response_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI response identifier", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the assistant was deleted", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the assistant was last updated", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the assistant was created", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + existing_server_default=sa.text("false"), + ) + op.alter_column( + "openai_assistant", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the assistant", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "openai_assistant", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "max_num_results", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Parameter that controls maximum number of results to return", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "temperature", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment="Parameter that controls the creativity or randomness of the text generated by model", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "vector_store_ids", + existing_type=postgresql.ARRAY(sa.VARCHAR()), + comment=None, + existing_comment="List of OpenAI vector store IDs attached", + existing_nullable=True, + ) + op.alter_column( + "openai_assistant", + "model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI model used by the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "instructions", + existing_type=sa.TEXT(), + comment=None, + existing_comment="System instructions for the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "name", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Name of the assistant", + existing_nullable=False, + ) + op.alter_column( + "openai_assistant", + "assistant_id", + existing_type=sa.VARCHAR(length=255), + comment=None, + existing_comment="Unique identifier for the assistant at OpenAI", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation was deleted", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation was last updated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation was created", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the evaluation document", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "fine_tuning_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the fine-tuning job", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="modelevaluationstatus" + ), + comment=None, + existing_comment="Current status of the evaluation", + existing_nullable=False, + existing_server_default=sa.text("'pending'::modelevaluationstatus"), + ) + op.alter_column( + "model_evaluation", + "prediction_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL where the prediction data is stored", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "score", + existing_type=postgresql.JSON(astext_type=sa.Text()), + comment=None, + existing_comment="Evaluation scores per metric (e.g., MCC)", + existing_nullable=True, + ) + op.alter_column( + "model_evaluation", + "system_prompt", + existing_type=sa.TEXT(), + comment=None, + existing_comment="System prompt used during evaluation", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment="Train/test split ratio used", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "base_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URI of the testing data", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the fine-tuned model being evaluated", + existing_nullable=False, + ) + op.alter_column( + "model_evaluation", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the evaluation", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "job", + "created_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "job", + "job_type", + existing_type=postgresql.ENUM("RESPONSE", "LLM_API", name="jobtype"), + comment=None, + existing_comment="Type of job being executed (e.g., RESPONSE, LLM_API)", + existing_nullable=False, + ) + op.alter_column( + "job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESS", "FAILED", name="jobstatus" + ), + comment=None, + existing_comment="Current state of the job (PENDING, PROCESSING, SUCCESS, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "job", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error details if the job fails", + existing_nullable=True, + ) + op.alter_column( + "job", + "trace_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "job", + "task_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Celery task ID returned when job is queued", + existing_nullable=True, + ) + op.alter_column( + "job", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the job", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was deleted", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the training document", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "test_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URI of the testing data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "train_data_s3_object", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URI of the training data", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "fine_tuned_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the resulting fine-tuned model", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "status", + existing_type=postgresql.ENUM( + "pending", "running", "completed", "failed", name="finetuningstatus" + ), + comment=None, + existing_comment="Current status of the fine-tuning job", + existing_nullable=False, + existing_server_default=sa.text("'pending'::finetuningstatus"), + ) + op.alter_column( + "fine_tuning", + "provider_job_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Fine-tuning job ID returned by the provider", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the fine-tuning job", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "fine_tuning", + "system_prompt", + existing_type=sa.TEXT(), + comment=None, + existing_comment="System prompt used during fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "training_file_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="OpenAI training file identifier", + existing_nullable=True, + ) + op.alter_column( + "fine_tuning", + "split_ratio", + existing_type=sa.DOUBLE_PRECISION(precision=53), + comment=None, + existing_comment="Train/test split ratio for the dataset", + existing_nullable=False, + ) + op.alter_column( + "fine_tuning", + "base_model", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Base model used for fine-tuning", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "error_message", + existing_type=sa.TEXT(), + comment=None, + existing_comment="Error message if evaluation failed", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "score", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Evaluation scores (correctness, cosine_similarity, etc.)", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "total_items", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Total number of items evaluated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "object_store_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL of processed evaluation results", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "status", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Evaluation status (pending, processing, completed, failed)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "embedding_batch_job_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the batch job for embedding similarity scoring", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "batch_job_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the batch job for responses", + existing_nullable=True, + ) + op.alter_column( + "evaluation_run", + "dataset_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Evaluation configuration (model, instructions, etc.)", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "dataset_name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the Langfuse dataset used", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "run_name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the evaluation run", + existing_nullable=False, + ) + op.alter_column( + "evaluation_run", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the evaluation run", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "evaluation_dataset", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation dataset was last updated", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the evaluation dataset was created", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "langfuse_dataset_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Langfuse dataset ID for observability integration", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "object_store_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL where the dataset CSV is stored", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "dataset_metadata", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Dataset metadata (item counts, duplication factor, etc.)", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "evaluation_dataset", + "description", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Description of the dataset", + existing_nullable=True, + ) + op.alter_column( + "evaluation_dataset", + "name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the evaluation dataset", + existing_nullable=False, + ) + op.alter_column( + "evaluation_dataset", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the dataset", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text( + "nextval('evaluation_dataset_id_seq'::regclass)" + ), + ) + op.alter_column( + "documentcollection", + "collection_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the collection", + existing_nullable=False, + ) + op.alter_column( + "documentcollection", + "document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the document", + existing_nullable=False, + ) + op.alter_column( + "documentcollection", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the document-collection link", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "document", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the document was deleted", + existing_nullable=True, + ) + op.alter_column( + "document", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the document was last updated", + existing_nullable=False, + ) + op.alter_column( + "document", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the document was uploaded", + existing_nullable=False, + ) + op.alter_column( + "document", + "source_document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to source document if this is a transformation", + existing_nullable=True, + ) + op.alter_column( + "document", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "document", + "object_store_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Cloud storage URL for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the document", + existing_nullable=False, + ) + op.alter_column( + "document", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "document", + "fname", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Original filename of the document", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "transformed_document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the resulting transformed document", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "source_document_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the source document being transformed", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "error_message", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Error message if transformation failed", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "trace_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "task_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "doc_transformation_job", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "COMPLETED", "FAILED", name="transformationstatus" + ), + comment=None, + existing_comment="Current status (PENDING, PROCESSING, COMPLETED, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "doc_transformation_job", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the transformation job", + existing_nullable=False, + ) + op.alter_column( + "credential", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the credential was last updated", + existing_nullable=False, + ) + op.alter_column( + "credential", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the credential was created", + existing_nullable=False, + ) + op.alter_column( + "credential", + "credential", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Encrypted JSON string containing provider-specific API credentials", + existing_nullable=False, + ) + op.alter_column( + "credential", + "provider", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider name like 'openai', 'gemini'", + existing_nullable=False, + ) + op.alter_column( + "credential", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique ID for the credential", + existing_nullable=False, + autoincrement=True, + ) + op.alter_column( + "credential", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "credential", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "credential", + "is_active", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Flag indicating if this credential is currently active and usable", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the version was soft-deleted", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the version was last updated", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the version was created", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "version", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Version number starting at 1", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "config_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the parent configuration", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the configuration version", + existing_nullable=False, + ) + op.alter_column( + "config_version", + "commit_message", + existing_type=sa.VARCHAR(length=512), + comment=None, + existing_comment="Optional message describing the changes in this version", + existing_nullable=True, + ) + op.alter_column( + "config_version", + "config_blob", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Provider-specific configuration parameters (temperature, max_tokens, etc.)", + existing_nullable=False, + ) + op.alter_column( + "config", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the configuration was deleted", + existing_nullable=True, + ) + op.alter_column( + "config", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the configuration was last updated", + existing_nullable=False, + ) + op.alter_column( + "config", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the configuration was created", + existing_nullable=False, + ) + op.alter_column( + "config", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "config", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the configuration", + existing_nullable=False, + ) + op.alter_column( + "config", + "description", + existing_type=sa.VARCHAR(length=512), + comment=None, + existing_comment="Description of the configuration", + existing_nullable=True, + ) + op.alter_column( + "config", + "name", + existing_type=sa.VARCHAR(length=128), + comment=None, + existing_comment="Configuration name", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the job was created", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "collection_id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Reference to the collection", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "error_message", + existing_type=sa.TEXT(), + comment=None, + existing_comment="Error message if the job failed", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "trace_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Tracing ID for correlating logs and traces", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "task_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Celery task ID for async processing", + existing_nullable=True, + ) + op.alter_column( + "collection_jobs", + "action_type", + existing_type=postgresql.ENUM("CREATE", "DELETE", name="collectionactiontype"), + comment=None, + existing_comment="Type of operation (CREATE, DELETE)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "status", + existing_type=postgresql.ENUM( + "PENDING", "PROCESSING", "SUCCESSFUL", "FAILED", name="collectionjobstatus" + ), + comment=None, + existing_comment="Current job status (PENDING, PROCESSING, SUCCESSFUL, FAILED)", + existing_nullable=False, + ) + op.alter_column( + "collection_jobs", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the collection job", + existing_nullable=False, + ) + op.alter_column( + "collection", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the collection was deleted", + existing_nullable=True, + ) + op.alter_column( + "collection", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the collection was last updated", + existing_nullable=False, + ) + op.alter_column( + "collection", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the collection was created", + existing_nullable=False, + ) + op.alter_column( + "collection", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "collection", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_name", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Name of the LLM service provider", + existing_nullable=False, + ) + op.alter_column( + "collection", + "llm_service_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="External LLM service identifier (e.g., OpenAI vector store ID)", + existing_nullable=False, + ) + op.alter_column( + "collection", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the collection", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the batch job was last updated", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the batch job was started", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "error_message", + existing_type=sa.TEXT(), + comment=None, + existing_comment="Error message if batch failed", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "total_items", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Total number of items in the batch", + existing_nullable=False, + existing_server_default=sa.text("0"), + ) + op.alter_column( + "batch_job", + "raw_output_url", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="S3 URL of raw batch output file", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_status", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider-specific status (e.g., validating, in_progress, completed, failed)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_output_file_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider's output file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_file_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider's input file ID", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "provider_batch_id", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Provider's batch job ID (e.g., OpenAI batch_id)", + existing_nullable=True, + ) + op.alter_column( + "batch_job", + "config", + existing_type=postgresql.JSONB(astext_type=sa.Text()), + comment=None, + existing_comment="Complete batch configuration including model, temperature, instructions, tools, etc.", + existing_nullable=False, + existing_server_default=sa.text("'{}'::jsonb"), + ) + op.alter_column( + "batch_job", + "job_type", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Type of batch job (e.g., evaluation, classification, embedding)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "provider", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="LLM provider name (e.g., openai, anthropic)", + existing_nullable=False, + ) + op.alter_column( + "batch_job", + "id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Unique identifier for the batch job", + existing_nullable=False, + autoincrement=True, + existing_server_default=sa.text("nextval('batch_job_id_seq'::regclass)"), + ) + op.alter_column( + "apikey", + "deleted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the API key was deleted", + existing_nullable=True, + ) + op.alter_column( + "apikey", + "updated_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the API key was last updated", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "inserted_at", + existing_type=postgresql.TIMESTAMP(), + comment=None, + existing_comment="Timestamp when the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "is_deleted", + existing_type=sa.BOOLEAN(), + comment=None, + existing_comment="Soft delete flag", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_hash", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Bcrypt hash of the secret of the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "key_prefix", + existing_type=sa.VARCHAR(), + comment=None, + existing_comment="Unique prefix portion of the API key for identification", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "id", + existing_type=sa.UUID(), + comment=None, + existing_comment="Unique identifier for the API key", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "user_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the user for whom the API key was created", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "project_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the project", + existing_nullable=False, + ) + op.alter_column( + "apikey", + "organization_id", + existing_type=sa.INTEGER(), + comment=None, + existing_comment="Reference to the organization", + existing_nullable=False, + ) + # ### end Alembic commands ### diff --git a/backend/app/models/api_key.py b/backend/app/models/api_key.py index 1da56382..516073f2 100644 --- a/backend/app/models/api_key.py +++ b/backend/app/models/api_key.py @@ -1,21 +1,35 @@ -from uuid import UUID, uuid4 -import secrets -import base64 from datetime import datetime -from typing import Optional, List -from sqlmodel import SQLModel, Field, Relationship +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel from app.core.util import now class APIKeyBase(SQLModel): + """Base model for API keys with foreign key fields.""" + + # Foreign keys organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + user_id: int = Field( + foreign_key="user.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={ + "comment": "Reference to the user for whom the API key was created" + }, ) - user_id: int = Field(foreign_key="user.id", nullable=False, ondelete="CASCADE") class APIKeyPublic(APIKeyBase): @@ -32,14 +46,44 @@ class APIKeyCreateResponse(APIKeyPublic): class APIKey(APIKeyBase, table=True): - id: UUID = Field(default_factory=uuid4, primary_key=True) + """Database model for API keys.""" + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the API key"}, + ) key_prefix: str = Field( - unique=True, index=True, nullable=False - ) # Unique identifier from the key - key_hash: str = Field(nullable=False) # bcrypt hash of the secret portion - - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - is_deleted: bool = Field(default=False, nullable=False) - deleted_at: Optional[datetime] = Field(default=None, nullable=True) + unique=True, + index=True, + nullable=False, + sa_column_kwargs={ + "comment": "Unique prefix portion of the API key for identification" + }, + ) + key_hash: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Bcrypt hash of the secret of the API key"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the API key was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the API key was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the API key was deleted"}, + ) diff --git a/backend/app/models/assistants.py b/backend/app/models/assistants.py index 7e707068..bb9b3318 100644 --- a/backend/app/models/assistants.py +++ b/backend/app/models/assistants.py @@ -1,11 +1,12 @@ from datetime import datetime -from typing import List, Optional from sqlalchemy import Column, String, Text from sqlalchemy.dialects.postgresql import ARRAY from sqlmodel import Field, Relationship, SQLModel, UniqueConstraint from app.core.util import now +from app.models.organization import Organization +from app.models.project import Project class AssistantBase(SQLModel): @@ -17,7 +18,7 @@ class AssistantBase(SQLModel): name: str instructions: str = Field(sa_column=Column(Text, nullable=False)) model: str - vector_store_ids: List[str] = Field( + vector_store_ids: list[str] = Field( default_factory=list, sa_column=Column(ARRAY(String)) ) temperature: float = 0.1 @@ -31,17 +32,88 @@ class AssistantBase(SQLModel): class Assistant(AssistantBase, table=True): + """OpenAI assistant configuration and metadata.""" + __tablename__ = "openai_assistant" - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - is_deleted: bool = Field(default=False, nullable=False) - deleted_at: Optional[datetime] = Field(default=None, nullable=True) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the assistant"}, + ) + assistant_id: str = Field( + index=True, + sa_column_kwargs={"comment": "Unique identifier for the assistant at OpenAI"}, + ) + name: str = Field( + sa_column_kwargs={"comment": "Name of the assistant"}, + ) + instructions: str = Field( + sa_column=Column( + Text, nullable=False, comment="System instructions for the assistant" + ) + ) + model: str = Field( + sa_column_kwargs={"comment": "OpenAI model used by the assistant"}, + ) + vector_store_ids: list[str] = Field( + default_factory=list, + sa_column=Column( + ARRAY(String), comment="List of OpenAI vector store IDs attached" + ), + ) + temperature: float = Field( + default=0.1, + sa_column_kwargs={ + "comment": "Parameter that controls the creativity or randomness of the text generated by model" + }, + ) + max_num_results: int = Field( + default=20, + sa_column_kwargs={ + "comment": "Parameter that controls maximum number of results to return" + }, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + project_id: int = Field( + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, + ) + organization_id: int = Field( + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the assistant was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the assistant was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the assistant was deleted"}, + ) # Relationships - project: "Project" = Relationship(back_populates="assistants") - organization: "Organization" = Relationship(back_populates="assistants") + project: Project = Relationship(back_populates="assistants") + organization: Organization = Relationship(back_populates="assistants") class AssistantCreate(SQLModel): diff --git a/backend/app/models/batch_job.py b/backend/app/models/batch_job.py index 68b79762..d34c040f 100644 --- a/backend/app/models/batch_job.py +++ b/backend/app/models/batch_job.py @@ -13,7 +13,7 @@ class BatchJob(SQLModel, table=True): - """Batch job table for tracking async LLM batch operations.""" + """Database model for BatchJob operations.""" __tablename__ = "batch_job" __table_args__ = ( @@ -21,26 +21,37 @@ class BatchJob(SQLModel, table=True): Index("idx_batch_job_status_project", "provider_status", "project_id"), ) - id: int | None = Field(default=None, primary_key=True) + id: int | None = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the batch job"}, + ) # Provider and job type provider: str = Field( description="LLM provider name (e.g., 'openai', 'anthropic')", + sa_column_kwargs={"comment": "LLM provider name (e.g., openai, anthropic)"}, ) job_type: str = Field( index=True, description=( "Type of batch job (e.g., 'evaluation', 'classification', 'embedding')" ), + sa_column_kwargs={ + "comment": "Type of batch job (e.g., evaluation, classification, embedding)" + }, ) # Batch configuration - stores all provider-specific config config: dict[str, Any] = Field( default_factory=dict, - sa_column=Column(JSONB, nullable=False), + sa_column=Column( + JSONB, + nullable=False, + comment="Complete batch configuration including model, temperature, instructions, tools, etc.", + ), description=( - "Complete batch configuration including model, temperature, " - "instructions, tools, etc." + "Complete batch configuration including model, temperature, instructions, tools, etc." ), ) @@ -48,14 +59,17 @@ class BatchJob(SQLModel, table=True): provider_batch_id: str | None = Field( default=None, description="Provider's batch job ID (e.g., OpenAI batch_id)", + sa_column_kwargs={"comment": "Provider's batch job ID (e.g., OpenAI batch_id)"}, ) provider_file_id: str | None = Field( default=None, description="Provider's input file ID", + sa_column_kwargs={"comment": "Provider's input file ID"}, ) provider_output_file_id: str | None = Field( default=None, description="Provider's output file ID", + sa_column_kwargs={"comment": "Provider's output file ID"}, ) # Provider status tracking @@ -65,40 +79,56 @@ class BatchJob(SQLModel, table=True): "Provider-specific status (e.g., OpenAI: validating, in_progress, " "finalizing, completed, failed, expired, cancelling, cancelled)" ), + sa_column_kwargs={ + "comment": "Provider-specific status (e.g., validating, in_progress, completed, failed)" + }, ) # Raw results (before parent-specific processing) raw_output_url: str | None = Field( default=None, description="S3 URL of raw batch output file", + sa_column_kwargs={"comment": "S3 URL of raw batch output file"}, ) total_items: int = Field( default=0, description="Total number of items in the batch", + sa_column_kwargs={"comment": "Total number of items in the batch"}, ) # Error handling error_message: str | None = Field( default=None, - sa_column=Column(Text, nullable=True), + sa_column=Column(Text, nullable=True, comment="Error message if batch failed"), description="Error message if batch failed", ) # Foreign keys organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE", index=True + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + index=True, + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE", index=True + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + index=True, + sa_column_kwargs={"comment": "Reference to the project"}, ) # Timestamps inserted_at: datetime = Field( - default_factory=now, description="The timestamp when the batch job was started" + default_factory=now, + description="The timestamp when the batch job was started", + sa_column_kwargs={"comment": "Timestamp when the batch job was started"}, ) updated_at: datetime = Field( default_factory=now, description="The timestamp when the batch job was last updated", + sa_column_kwargs={"comment": "Timestamp when the batch job was last updated"}, ) # Relationships diff --git a/backend/app/models/collection.py b/backend/app/models/collection.py index e09f5622..353deef0 100644 --- a/backend/app/models/collection.py +++ b/backend/app/models/collection.py @@ -1,38 +1,65 @@ -from uuid import UUID, uuid4 from datetime import datetime -from typing import Any, Optional +from typing import Any +from uuid import UUID, uuid4 -from sqlmodel import Field, Relationship, SQLModel from pydantic import HttpUrl, model_validator +from sqlmodel import Field, Relationship, SQLModel from app.core.util import now from app.models.document import DocumentPublic + from .organization import Organization from .project import Project class Collection(SQLModel, table=True): - id: UUID = Field(default_factory=uuid4, primary_key=True) + """Database model for Collection operations.""" + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the collection"}, + ) + llm_service_id: str = Field( + nullable=False, + sa_column_kwargs={ + "comment": "External LLM service identifier (e.g., OpenAI vector store ID)" + }, + ) + llm_service_name: str = Field( + nullable=False, + sa_column_kwargs={"comment": "Name of the LLM service provider"}, + ) + + # Foreign keys organization_id: int = Field( foreign_key="organization.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) - project_id: int = Field( foreign_key="project.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - llm_service_id: str = Field(nullable=False) - llm_service_name: str = Field(nullable=False) - - inserted_at: datetime = Field(default_factory=now) - updated_at: datetime = Field(default_factory=now) - deleted_at: Optional[datetime] = None + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the collection was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the collection was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + sa_column_kwargs={"comment": "Timestamp when the collection was deleted"}, + ) + # Relationships organization: Organization = Relationship(back_populates="collections") project: Project = Relationship(back_populates="collections") @@ -59,7 +86,7 @@ class AssistantOptions(SQLModel): # Fields to be passed along to OpenAI. They must be a subset of # parameters accepted by the OpenAI.clien.beta.assistants.create # API. - model: Optional[str] = Field( + model: str | None = Field( default=None, description=( "**[To Be Deprecated]** " @@ -69,7 +96,7 @@ class AssistantOptions(SQLModel): ), ) - instructions: Optional[str] = Field( + instructions: str | None = Field( default=None, description=( "**[To Be Deprecated]** " @@ -112,7 +139,7 @@ def norm(x: Any) -> Any: class CallbackRequest(SQLModel): - callback_url: Optional[HttpUrl] = Field( + callback_url: HttpUrl | None = Field( default=None, description="URL to call to report endpoint status", ) diff --git a/backend/app/models/collection_job.py b/backend/app/models/collection_job.py index 4739b16c..60be4eec 100644 --- a/backend/app/models/collection_job.py +++ b/backend/app/models/collection_job.py @@ -1,12 +1,11 @@ +from datetime import datetime from enum import Enum from uuid import UUID, uuid4 -from datetime import datetime -from sqlmodel import Field, SQLModel, Column, Text -from pydantic import ConfigDict +from sqlmodel import Column, Field, SQLModel, Text from app.core.util import now -from app.models.collection import CollectionPublic, CollectionIDPublic +from app.models.collection import CollectionIDPublic, CollectionPublic class CollectionJobStatus(str, Enum): @@ -22,41 +21,70 @@ class CollectionActionType(str, Enum): class CollectionJob(SQLModel, table=True): - """Database model for tracking collection operations.""" + """Database model for CollectionJob operations.""" __tablename__ = "collection_jobs" - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the collection job"}, + ) status: CollectionJobStatus = Field( default=CollectionJobStatus.PENDING, nullable=False, description="Current job status", + sa_column_kwargs={ + "comment": "Current job status (PENDING, PROCESSING, SUCCESSFUL, FAILED)" + }, ) action_type: CollectionActionType = Field( - nullable=False, description="Type of operation" + nullable=False, + description="Type of operation", + sa_column_kwargs={"comment": "Type of operation (CREATE, DELETE)"}, + ) + task_id: str = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Celery task ID for async processing"}, + ) + trace_id: str | None = Field( + default=None, + description="Tracing ID for correlating logs and traces.", + sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, ) + error_message: str | None = Field( + sa_column=Column( + Text, nullable=True, comment="Error message if the job failed" + ), + ) + + # Foreign keys collection_id: UUID | None = Field( - foreign_key="collection.id", nullable=True, ondelete="CASCADE" + foreign_key="collection.id", + nullable=True, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the collection"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" - ) - task_id: str = Field(nullable=True) - trace_id: str | None = Field( - default=None, description="Tracing ID for correlating logs and traces." + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - error_message: str | None = Field(sa_column=Column(Text, nullable=True)) + # Timestamps inserted_at: datetime = Field( default_factory=now, nullable=False, description="When the job record was created", + sa_column_kwargs={"comment": "Timestamp when the job was created"}, ) - updated_at: datetime = Field( default_factory=now, nullable=False, description="Last time the job record was updated", + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, ) @property diff --git a/backend/app/models/config/config.py b/backend/app/models/config/config.py index 18bbbcdf..9155254a 100644 --- a/backend/app/models/config/config.py +++ b/backend/app/models/config/config.py @@ -13,9 +13,17 @@ class ConfigBase(SQLModel): """Base model for LLM configuration metadata""" - name: str = Field(min_length=1, max_length=128, description="Config name") + name: str = Field( + min_length=1, + max_length=128, + description="Config name", + sa_column_kwargs={"comment": "Configuration name"}, + ) description: str | None = Field( - default=None, max_length=512, description="Optional description" + default=None, + max_length=512, + description="Description of the configuration", + sa_column_kwargs={"comment": "Description of the configuration"}, ) @@ -39,18 +47,37 @@ class Config(ConfigBase, table=True): ), ) - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the configuration"}, + ) project_id: int = Field( foreign_key="project.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the configuration was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the configuration was last updated" + }, + ) - deleted_at: datetime | None = Field(default=None, nullable=True) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the configuration was deleted"}, + ) class ConfigCreate(ConfigBase): @@ -61,7 +88,7 @@ class ConfigCreate(ConfigBase): commit_message: str | None = Field( default=None, max_length=512, - description="Optional message describing the changes in this version", + description="Message describing the changes in this version", ) @field_validator("config_blob") diff --git a/backend/app/models/config/version.py b/backend/app/models/config/version.py index bb44531d..5a374582 100644 --- a/backend/app/models/config/version.py +++ b/backend/app/models/config/version.py @@ -13,13 +13,20 @@ class ConfigVersionBase(SQLModel): config_blob: dict[str, Any] = Field( - sa_column=sa.Column(JSONB, nullable=False), + sa_column=sa.Column( + JSONB, + nullable=False, + comment="Provider-specific configuration parameters (temperature, max_tokens, etc.)", + ), description="Provider-specific configuration parameters (temperature, max_tokens, etc.)", ) commit_message: str | None = Field( default=None, max_length=512, description="Optional message describing the changes in this version", + sa_column_kwargs={ + "comment": "Optional message describing the changes in this version" + }, ) @field_validator("config_blob") @@ -43,21 +50,41 @@ class ConfigVersion(ConfigVersionBase, table=True): ), ) - id: UUID = Field(default_factory=uuid4, primary_key=True) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the configuration version"}, + ) config_id: UUID = Field( foreign_key="config.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the parent configuration"}, ) version: int = Field( - nullable=False, description="Version number starting at 1", ge=1 + nullable=False, + description="Version number starting at 1", + ge=1, + sa_column_kwargs={"comment": "Version number starting at 1"}, ) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the version was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the version was last updated"}, + ) - deleted_at: datetime | None = Field(default=None, nullable=True) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the version was soft-deleted"}, + ) class ConfigVersionCreate(ConfigVersionBase): diff --git a/backend/app/models/credentials.py b/backend/app/models/credentials.py index 03230b7b..6e284dbf 100644 --- a/backend/app/models/credentials.py +++ b/backend/app/models/credentials.py @@ -1,19 +1,38 @@ -from typing import Dict, Any, Optional +from datetime import datetime +from typing import Any + import sqlalchemy as sa from sqlmodel import Field, Relationship, SQLModel -from datetime import datetime from app.core.util import now +from app.models.organization import Organization +from app.models.project import Project class CredsBase(SQLModel): + """Base model for credentials with foreign keys and common fields.""" + + is_active: bool = Field( + default=True, + nullable=False, + sa_column_kwargs={ + "comment": "Flag indicating if this credential is currently active and usable" + }, + ) + + # Foreign keys organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = Field( - default=None, foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - is_active: bool = True class CredsCreate(SQLModel): @@ -23,7 +42,7 @@ class CredsCreate(SQLModel): """ is_active: bool = True - credential: Dict[str, Any] = Field( + credential: dict[str, Any] = Field( default=None, description="Dictionary mapping provider names to their credentials", ) @@ -37,10 +56,10 @@ class CredsUpdate(SQLModel): provider: str = Field( description="Name of the provider to update/add credentials for" ) - credential: Dict[str, Any] = Field( + credential: dict[str, Any] = Field( description="Credentials for the specified provider", ) - is_active: Optional[bool] = Field( + is_active: bool | None = Field( default=None, description="Whether the credentials are active" ) @@ -59,25 +78,40 @@ class Credential(CredsBase, table=True): ), ) - id: int = Field(default=None, primary_key=True) + id: int | None = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique ID for the credential"}, + ) provider: str = Field( - index=True, description="Provider name like 'openai', 'gemini'" + index=True, + nullable=False, + description="Provider name like 'openai', 'gemini'", + sa_column_kwargs={"comment": "Provider name like 'openai', 'gemini'"}, ) credential: str = Field( - sa_column=sa.Column(sa.String, nullable=False), - description="Encrypted provider-specific credentials", + nullable=False, + description="Encrypted JSON string containing provider-specific API credentials", + sa_column_kwargs={ + "comment": "Encrypted JSON string containing provider-specific API credentials" + }, ) + + # Timestamps inserted_at: datetime = Field( default_factory=now, - sa_column=sa.Column(sa.DateTime, default=datetime.utcnow, nullable=False), + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the credential was created"}, ) updated_at: datetime = Field( default_factory=now, - sa_column=sa.Column(sa.DateTime, onupdate=datetime.utcnow, nullable=False), + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the credential was last updated"}, ) - organization: Optional["Organization"] = Relationship(back_populates="creds") - project: Optional["Project"] = Relationship(back_populates="creds") + # Relationships + organization: Organization | None = Relationship(back_populates="creds") + project: Project | None = Relationship(back_populates="creds") def to_public(self) -> "CredsPublic": """Convert the database model to a public model with decrypted credentials.""" @@ -102,6 +136,6 @@ class CredsPublic(CredsBase): id: int provider: str - credential: Optional[Dict[str, Any]] = None + credential: dict[str, Any] | None = None inserted_at: datetime updated_at: datetime diff --git a/backend/app/models/doc_transformation_job.py b/backend/app/models/doc_transformation_job.py index 139825ee..e91d14ea 100644 --- a/backend/app/models/doc_transformation_job.py +++ b/backend/app/models/doc_transformation_job.py @@ -1,9 +1,9 @@ import enum -from uuid import UUID, uuid4 from datetime import datetime +from uuid import UUID, uuid4 -from sqlmodel import SQLModel, Field from pydantic import ConfigDict +from sqlmodel import Field, SQLModel from app.core.util import now @@ -16,21 +16,58 @@ class TransformationStatus(str, enum.Enum): class DocTransformationJob(SQLModel, table=True): + """Database model for DocTransformationJob operations.""" + __tablename__ = "doc_transformation_job" - id: UUID = Field(default_factory=uuid4, primary_key=True) - source_document_id: UUID = Field(foreign_key="document.id") - transformed_document_id: UUID | None = Field( - default=None, foreign_key="document.id" + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the transformation job"}, + ) + status: TransformationStatus = Field( + default=TransformationStatus.PENDING, + sa_column_kwargs={ + "comment": "Current status (PENDING, PROCESSING, COMPLETED, FAILED)" + }, + ) + task_id: str | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Celery task ID for async processing"}, ) - status: TransformationStatus = Field(default=TransformationStatus.PENDING) - task_id: str | None = Field(default=None, nullable=True) trace_id: str | None = Field( - default=None, description="Tracing ID for correlating logs and traces." + default=None, + description="Tracing ID for correlating logs and traces.", + sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, + ) + error_message: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Error message if transformation failed"}, + ) + + # Foreign keys + source_document_id: UUID = Field( + foreign_key="document.id", + sa_column_kwargs={ + "comment": "Reference to the source document being transformed" + }, + ) + transformed_document_id: UUID | None = Field( + default=None, + foreign_key="document.id", + sa_column_kwargs={"comment": "Reference to the resulting transformed document"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, ) - error_message: str | None = Field(default=None) - inserted_at: datetime = Field(default_factory=now) - updated_at: datetime = Field(default_factory=now) @property def job_id(self) -> UUID: diff --git a/backend/app/models/document.py b/backend/app/models/document.py index 60d28142..bffa7b39 100644 --- a/backend/app/models/document.py +++ b/backend/app/models/document.py @@ -1,6 +1,5 @@ -from uuid import UUID, uuid4 from datetime import datetime -from typing import Optional +from uuid import UUID, uuid4 from sqlmodel import Field, SQLModel @@ -9,35 +8,64 @@ class DocumentBase(SQLModel): + """Base model for documents with common fields.""" + + fname: str = Field( + description="The original filename of the document", + sa_column_kwargs={"comment": "Original filename of the document"}, + ) + + # Foreign keys project_id: int = Field( description="The ID of the project to which the document belongs", foreign_key="project.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) - fname: str = Field(description="The original filename of the document") class Document(DocumentBase, table=True): + """Database model for documents.""" + id: UUID = Field( default_factory=uuid4, primary_key=True, description="The unique identifier of the document", + sa_column_kwargs={"comment": "Unique identifier for the document"}, + ) + object_store_url: str = Field( + sa_column_kwargs={"comment": "Cloud storage URL for the document"}, + ) + is_deleted: bool = Field( + default=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + source_document_id: UUID | None = Field( + default=None, + foreign_key="document.id", + nullable=True, + sa_column_kwargs={ + "comment": "Reference to source document if this is a transformation" + }, ) - object_store_url: str + + # Timestamps inserted_at: datetime = Field( - default_factory=now, description="The timestamp when the document was inserted" + default_factory=now, + description="The timestamp when the document was inserted", + sa_column_kwargs={"comment": "Timestamp when the document was uploaded"}, ) updated_at: datetime = Field( default_factory=now, description="The timestamp when the document was last updated", + sa_column_kwargs={"comment": "Timestamp when the document was last updated"}, ) - is_deleted: bool = Field(default=False) - deleted_at: datetime | None - source_document_id: Optional[UUID] = Field( + deleted_at: datetime | None = Field( default=None, - foreign_key="document.id", - nullable=True, + sa_column_kwargs={"comment": "Timestamp when the document was deleted"}, ) diff --git a/backend/app/models/document_collection.py b/backend/app/models/document_collection.py index 0e43259e..93db6df3 100644 --- a/backend/app/models/document_collection.py +++ b/backend/app/models/document_collection.py @@ -1,23 +1,27 @@ from uuid import UUID -from typing import Optional from sqlmodel import Field, SQLModel -from app.core.util import now - class DocumentCollection(SQLModel, table=True): - id: Optional[int] = Field( + """Junction table linking documents to collections.""" + + id: int | None = Field( default=None, primary_key=True, + sa_column_kwargs={ + "comment": "Unique identifier for the document-collection link" + }, ) document_id: UUID = Field( foreign_key="document.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the document"}, ) collection_id: UUID = Field( foreign_key="collection.id", nullable=False, ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the collection"}, ) diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py index d86db892..f99fbb27 100644 --- a/backend/app/models/evaluation.py +++ b/backend/app/models/evaluation.py @@ -83,18 +83,32 @@ class EvaluationDataset(SQLModel, table=True): ), ) - id: int = SQLField(default=None, primary_key=True) + id: int = SQLField( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the dataset"}, + ) # Dataset information - name: str = SQLField(index=True, description="Name of the dataset") + name: str = SQLField( + index=True, + description="Name of the dataset", + sa_column_kwargs={"comment": "Name of the evaluation dataset"}, + ) description: str | None = SQLField( - default=None, description="Optional description of the dataset" + default=None, + description="Optional description of the dataset", + sa_column_kwargs={"comment": "Description of the dataset"}, ) # Dataset metadata stored as JSONB dataset_metadata: dict[str, Any] = SQLField( default_factory=dict, - sa_column=Column(JSONB, nullable=False), + sa_column=Column( + JSONB, + nullable=False, + comment="Dataset metadata (item counts, duplication factor, etc.)", + ), description=( "Dataset metadata (original_items_count, total_items_count, " "duplication_factor)" @@ -103,23 +117,47 @@ class EvaluationDataset(SQLModel, table=True): # Storage references object_store_url: str | None = SQLField( - default=None, description="Object store URL where CSV is stored" + default=None, + description="Object store URL where CSV is stored", + sa_column_kwargs={"comment": "S3 URL where the dataset CSV is stored"}, ) langfuse_dataset_id: str | None = SQLField( - default=None, description="Langfuse dataset ID for reference" + default=None, + description="Langfuse dataset ID for reference", + sa_column_kwargs={ + "comment": "Langfuse dataset ID for observability integration" + }, ) # Foreign keys organization_id: int = SQLField( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = SQLField( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) # Timestamps - inserted_at: datetime = SQLField(default_factory=now, nullable=False) - updated_at: datetime = SQLField(default_factory=now, nullable=False) + inserted_at: datetime = SQLField( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the evaluation dataset was created" + }, + ) + updated_at: datetime = SQLField( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the evaluation dataset was last updated" + }, + ) # Relationships project: "Project" = Relationship() @@ -138,16 +176,31 @@ class EvaluationRun(SQLModel, table=True): Index("idx_eval_run_status_project", "status", "project_id"), ) - id: int = SQLField(default=None, primary_key=True) + id: int = SQLField( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the evaluation run"}, + ) # Input fields (provided by user) - run_name: str = SQLField(index=True, description="Name of the evaluation run") - dataset_name: str = SQLField(description="Name of the Langfuse dataset") + run_name: str = SQLField( + index=True, + description="Name of the evaluation run", + sa_column_kwargs={"comment": "Name of the evaluation run"}, + ) + dataset_name: str = SQLField( + description="Name of the Langfuse dataset", + sa_column_kwargs={"comment": "Name of the Langfuse dataset used"}, + ) # Config field - dict requires sa_column config: dict[str, Any] = SQLField( default_factory=dict, - sa_column=Column(JSONB, nullable=False), + sa_column=Column( + JSONB, + nullable=False, + comment="Evaluation configuration (model, instructions, etc.)", + ), description="Evaluation configuration", ) @@ -157,6 +210,7 @@ class EvaluationRun(SQLModel, table=True): nullable=False, ondelete="CASCADE", description="Reference to the evaluation_dataset used for this run", + sa_column_kwargs={"comment": "Reference to the evaluation dataset"}, ) # Batch job references @@ -167,6 +221,7 @@ class EvaluationRun(SQLModel, table=True): description=( "Reference to the batch_job that processes this evaluation (responses)" ), + sa_column_kwargs={"comment": "Reference to the batch job for responses"}, ) embedding_batch_job_id: int | None = SQLField( default=None, @@ -174,51 +229,78 @@ class EvaluationRun(SQLModel, table=True): nullable=True, ondelete="SET NULL", description="Reference to the batch_job for embedding-based similarity scoring", + sa_column_kwargs={ + "comment": "Reference to the batch job for embedding similarity scoring" + }, ) # Output/Status fields (updated by system during processing) status: str = SQLField( default="pending", description="Overall evaluation status: pending, processing, completed, failed", + sa_column_kwargs={ + "comment": "Evaluation status (pending, processing, completed, failed)" + }, ) object_store_url: str | None = SQLField( default=None, description="Object store URL of processed evaluation results for future reference", + sa_column_kwargs={"comment": "S3 URL of processed evaluation results"}, ) total_items: int = SQLField( - default=0, description="Total number of items evaluated (set during processing)" + default=0, + description="Total number of items evaluated (set during processing)", + sa_column_kwargs={"comment": "Total number of items evaluated"}, ) # Score field - dict requires sa_column score: dict[str, Any] | None = SQLField( default=None, - sa_column=Column(JSONB, nullable=True), + sa_column=Column( + JSONB, + nullable=True, + comment="Evaluation scores (correctness, cosine_similarity, etc.)", + ), description="Evaluation scores (e.g., correctness, cosine_similarity, etc.)", ) # Error message field error_message: str | None = SQLField( default=None, - sa_column=Column(Text, nullable=True), + sa_column=Column( + Text, nullable=True, comment="Error message if evaluation failed" + ), description="Error message if failed", ) # Foreign keys organization_id: int = SQLField( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) project_id: int = SQLField( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) # Timestamps inserted_at: datetime = Field( default_factory=now, + nullable=False, description="The timestamp when the evaluation run was started", + sa_column_kwargs={"comment": "Timestamp when the evaluation run was started"}, ) updated_at: datetime = Field( default_factory=now, + nullable=False, description="The timestamp when the evaluation run was last updated", + sa_column_kwargs={ + "comment": "Timestamp when the evaluation run was last updated" + }, ) # Relationships diff --git a/backend/app/models/fine_tuning.py b/backend/app/models/fine_tuning.py index 4e326ee5..6b174da9 100644 --- a/backend/app/models/fine_tuning.py +++ b/backend/app/models/fine_tuning.py @@ -1,13 +1,13 @@ -from typing import Optional -from uuid import UUID -from enum import Enum from datetime import datetime +from enum import Enum +from uuid import UUID -from sqlalchemy import Column, Text from pydantic import field_validator -from sqlmodel import SQLModel, Field, Relationship +from sqlalchemy import Column, Text +from sqlmodel import Field, Relationship, SQLModel from app.core.util import now +from app.models.project import Project class FineTuningStatus(str, Enum): @@ -22,7 +22,7 @@ class FineTuningJobBase(SQLModel): base_model: str = Field(nullable=False, description="Base model for fine-tuning") split_ratio: float = Field(nullable=False) document_id: UUID = Field(foreign_key="document.id", nullable=False) - training_file_id: Optional[str] = Field(default=None) + training_file_id: str | None = Field(default=None) system_prompt: str = Field(sa_column=Column(Text, nullable=False)) @@ -55,50 +55,114 @@ def check_prompt(cls, v): class Fine_Tuning(FineTuningJobBase, table=True): """Database model for tracking fine-tuning jobs.""" - id: int = Field(primary_key=True) + id: int = Field( + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the fine-tuning job"}, + ) + base_model: str = Field( + nullable=False, + description="Base model for fine-tuning", + sa_column_kwargs={"comment": "Base model used for fine-tuning"}, + ) + split_ratio: float = Field( + nullable=False, + sa_column_kwargs={"comment": "Train/test split ratio for the dataset"}, + ) + training_file_id: str | None = Field( + default=None, + sa_column_kwargs={"comment": "OpenAI training file identifier"}, + ) + system_prompt: str = Field( + sa_column=Column( + Text, nullable=False, comment="System prompt used during fine-tuning" + ) + ) provider_job_id: str | None = Field( - default=None, description="Fine tuning Job ID returned by OpenAI" + default=None, + description="Fine tuning Job ID returned by OpenAI", + sa_column_kwargs={"comment": "Fine-tuning job ID returned by the provider"}, ) - status: FineTuningStatus = ( - Field(default=FineTuningStatus.pending, description="Fine tuning status"), + status: FineTuningStatus = Field( + default=FineTuningStatus.pending, + description="Fine tuning status", + sa_column_kwargs={"comment": "Current status of the fine-tuning job"}, ) fine_tuned_model: str | None = Field( - default=None, description="Final fine tuned model name from OpenAI" + default=None, + description="Final fine tuned model name from OpenAI", + sa_column_kwargs={"comment": "Name of the resulting fine-tuned model"}, ) train_data_s3_object: str | None = Field( - default=None, description="S3 URI of the training data stored ins S3" + default=None, + description="S3 URI of the training data stored in S3", + sa_column_kwargs={"comment": "S3 URI of the training data"}, ) test_data_s3_object: str | None = Field( - default=None, description="S3 URI of the testing data stored ins S3" + default=None, + description="S3 URI of the testing data stored in S3", + sa_column_kwargs={"comment": "S3 URI of the testing data"}, ) error_message: str | None = Field( - default=None, description="error message for when something failed" + default=None, + description="Error message for when something failed", + sa_column_kwargs={"comment": "Error message if the job failed"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + document_id: UUID = Field( + foreign_key="document.id", + nullable=False, + sa_column_kwargs={"comment": "Reference to the training document"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) - is_deleted: bool = Field(default=False, nullable=False) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - deleted_at: datetime | None = Field(default=None, nullable=True) + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the job was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the job was deleted"}, + ) - project: "Project" = Relationship(back_populates="fine_tuning") + # Relationships + project: Project = Relationship(back_populates="fine_tuning") model_evaluation: "ModelEvaluation" = Relationship(back_populates="fine_tuning") class FineTuningUpdate(SQLModel): - training_file_id: Optional[str] = None - train_data_s3_object: Optional[str] = None - test_data_s3_object: Optional[str] = None - split_ratio: Optional[float] = None - provider_job_id: Optional[str] = None - fine_tuned_model: Optional[str] = None - status: Optional[str] = None - error_message: Optional[str] = None + training_file_id: str | None = None + train_data_s3_object: str | None = None + test_data_s3_object: str | None = None + split_ratio: float | None = None + provider_job_id: str | None = None + fine_tuned_model: str | None = None + status: str | None = None + error_message: str | None = None class FineTuningJobPublic(SQLModel): diff --git a/backend/app/models/job.py b/backend/app/models/job.py index 62851f5f..b6a1a5ae 100644 --- a/backend/app/models/job.py +++ b/backend/app/models/job.py @@ -1,8 +1,9 @@ from datetime import datetime from enum import Enum -from uuid import uuid4, UUID +from uuid import UUID, uuid4 + +from sqlmodel import Field, SQLModel -from sqlmodel import SQLModel, Field from app.core.util import now @@ -19,29 +20,53 @@ class JobType(str, Enum): class Job(SQLModel, table=True): + """Database model for tracking async jobs.""" + __tablename__ = "job" id: UUID = Field( default_factory=uuid4, primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the job"}, ) task_id: str | None = Field( - nullable=True, description="Celery task ID returned when job is queued." + nullable=True, + description="Celery task ID returned when job is queued.", + sa_column_kwargs={"comment": "Celery task ID returned when job is queued"}, ) trace_id: str | None = Field( - default=None, description="Tracing ID for correlating logs and traces." + default=None, + description="Tracing ID for correlating logs and traces.", + sa_column_kwargs={"comment": "Tracing ID for correlating logs and traces"}, ) error_message: str | None = Field( - default=None, description="Error details if the job fails." + default=None, + description="Error details if the job fails.", + sa_column_kwargs={"comment": "Error details if the job fails"}, ) status: JobStatus = Field( - default=JobStatus.PENDING, description="Current state of the job." + default=JobStatus.PENDING, + description="Current state of the job.", + sa_column_kwargs={ + "comment": "Current state of the job (PENDING, PROCESSING, SUCCESS, FAILED)" + }, ) job_type: JobType = Field( - description="Type of job being executed (e.g., response, ingestion)." + description="Type of job being executed (e.g., response, ingestion).", + sa_column_kwargs={ + "comment": "Type of job being executed (e.g., RESPONSE, LLM_API)" + }, + ) + + # Timestamps + created_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the job was last updated"}, ) - created_at: datetime = Field(default_factory=now) - updated_at: datetime = Field(default_factory=now) class JobUpdate(SQLModel): diff --git a/backend/app/models/model_evaluation.py b/backend/app/models/model_evaluation.py index 900b57b6..5354d6c3 100644 --- a/backend/app/models/model_evaluation.py +++ b/backend/app/models/model_evaluation.py @@ -1,14 +1,14 @@ -from typing import Optional -from uuid import UUID -from enum import Enum from datetime import datetime +from enum import Enum +from uuid import UUID -from sqlmodel import SQLModel, Field, Relationship +from pydantic import field_validator from sqlalchemy import Column, Text from sqlalchemy.dialects.postgresql import JSON -from pydantic import field_validator +from sqlmodel import Field, Relationship, SQLModel from app.core.util import now +from app.models.project import Project class ModelEvaluationStatus(str, Enum): @@ -36,60 +36,118 @@ def dedupe_ids(cls, v: list[int]) -> list[int]: class ModelEvaluation(ModelEvaluationBase, table=True): - """Database model for keeping a record of model evaluation""" + """Database model for keeping a record of model evaluation.""" __tablename__ = "model_evaluation" - id: int = Field(primary_key=True) - - document_id: UUID = Field( - foreign_key="document.id", - nullable=False, + id: int = Field( + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the evaluation"}, + ) + fine_tuned_model: str = Field( + description="Fine-tuned model name from OpenAI", + sa_column_kwargs={"comment": "Name of the fine-tuned model being evaluated"}, ) - fine_tuned_model: str = Field(description="fine tuned model name from OpenAI") test_data_s3_object: str = Field( - description="S3 URI of the testing data stored in S3" + description="S3 URI of the testing data stored in S3", + sa_column_kwargs={"comment": "S3 URI of the testing data"}, + ) + base_model: str = Field( + nullable=False, + description="Base model used for fine-tuning", + sa_column_kwargs={"comment": "Base model used for fine-tuning"}, ) - base_model: str = Field(nullable=False, description="Base model for fine-tuning") split_ratio: float = Field( - nullable=False, description="the ratio the dataset was divided in" + nullable=False, + description="The ratio the dataset was divided in", + sa_column_kwargs={"comment": "Train/test split ratio used"}, + ) + system_prompt: str = Field( + description="System prompt used during evaluation", + sa_column=Column( + Text, nullable=False, comment="System prompt used during evaluation" + ), ) - system_prompt: str = Field(sa_column=Column(Text, nullable=False)) score: dict[str, float] | None = Field( - sa_column=Column(JSON, nullable=True), description="Evaluation scores per metric (e.g., {'mcc': 0.85})", + sa_column=Column( + JSON, nullable=True, comment="Evaluation scores per metric (e.g., MCC)" + ), ) prediction_data_s3_object: str | None = Field( default=None, description="S3 URL where the prediction data generated by the fine-tuned model is stored", + sa_column_kwargs={"comment": "S3 URL where the prediction data is stored"}, ) - status: ModelEvaluationStatus = ( - Field(default=ModelEvaluationStatus.pending, description="Evaluation status"), + status: ModelEvaluationStatus = Field( + default=ModelEvaluationStatus.pending, + description="Current status of the evaluation", + sa_column_kwargs={"comment": "Current status of the evaluation"}, ) error_message: str | None = Field( - default=None, description="error message for when something failed" + default=None, + description="Error message if evaluation failed", + sa_column_kwargs={"comment": "Error message if evaluation failed"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + description="Soft delete flag", + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Foreign keys + fine_tuning_id: int = Field( + foreign_key="fine_tuning.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the fine-tuning job"}, + ) + document_id: UUID = Field( + foreign_key="document.id", + nullable=False, + sa_column_kwargs={"comment": "Reference to the evaluation document"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) - is_deleted: bool = Field(default=False, nullable=False) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - deleted_at: datetime | None = Field(default=None, nullable=True) + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the evaluation was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the evaluation was last updated"}, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the evaluation was deleted"}, + ) - project: "Project" = Relationship() + # Relationships + project: Project = Relationship() fine_tuning: "Fine_Tuning" = Relationship(back_populates="model_evaluation") class ModelEvaluationUpdate(SQLModel): - score: Optional[dict[str, float]] = None - status: Optional[ModelEvaluationStatus] = None - error_message: Optional[str] = None - prediction_data_s3_object: Optional[str] = None + score: dict[str, float] | None = None + status: ModelEvaluationStatus | None = None + error_message: str | None = None + prediction_data_s3_object: str | None = None class ModelEvaluationPublic(ModelEvaluationBase): diff --git a/backend/app/models/openai_conversation.py b/backend/app/models/openai_conversation.py index 6003c720..c319f9de 100644 --- a/backend/app/models/openai_conversation.py +++ b/backend/app/models/openai_conversation.py @@ -1,11 +1,12 @@ import re - from datetime import datetime -from typing import Optional + from pydantic import field_validator from sqlmodel import Field, Relationship, SQLModel from app.core.util import now +from app.models.organization import Organization +from app.models.project import Project def validate_response_id_pattern(v: str) -> str: @@ -21,32 +22,57 @@ def validate_response_id_pattern(v: str) -> str: class OpenAIConversationBase(SQLModel): # usually follow the pattern of resp_688704e41190819db512c30568xxxxxxx - response_id: str = Field(index=True, min_length=10) + response_id: str = Field( + index=True, + min_length=10, + sa_column_kwargs={"comment": "OpenAI response identifier"}, + ) ancestor_response_id: str = Field( index=True, description="Ancestor response ID for conversation threading", + sa_column_kwargs={"comment": "Root response ID for conversation threading"}, ) - previous_response_id: Optional[str] = Field( - default=None, index=True, description="Previous response ID in the conversation" + previous_response_id: str | None = Field( + default=None, + index=True, + description="Previous response ID in the conversation", + sa_column_kwargs={"comment": "Previous response ID in the conversation chain"}, + ) + user_question: str = Field( + description="User's question/input", + sa_column_kwargs={"comment": "User's question or input text"}, + ) + response: str | None = Field( + default=None, + description="AI response", + sa_column_kwargs={"comment": "Response generated by OpenAI"}, ) - user_question: str = Field(description="User's question/input") - response: Optional[str] = Field(default=None, description="AI response") # there are models with small name like o1 and usually fine tuned models have long names model: str = Field( - description="The model used for the response", min_length=1, max_length=150 + description="The model used for the response", + min_length=1, + max_length=150, + sa_column_kwargs={"comment": "Model used to generate the response"}, ) # usually follow the pattern of asst_WD9bumYqTtpSvxxxxx - assistant_id: Optional[str] = Field( + assistant_id: str | None = Field( default=None, description="The assistant ID used", min_length=10, max_length=50, + sa_column_kwargs={"comment": "OpenAI assistant identifier if used"}, ) project_id: int = Field( - foreign_key="project.id", nullable=False, ondelete="CASCADE" + foreign_key="project.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the project"}, ) organization_id: int = Field( - foreign_key="organization.id", nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, ) @field_validator("response_id", "ancestor_response_id", "previous_response_id") @@ -56,17 +82,43 @@ def validate_response_ids(cls, v): class OpenAIConversation(OpenAIConversationBase, table=True): + """Stores OpenAI conversation history and responses.""" + __tablename__ = "openai_conversation" - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) - is_deleted: bool = Field(default=False, nullable=False) - deleted_at: Optional[datetime] = Field(default=None, nullable=True) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the conversation record"}, + ) + is_deleted: bool = Field( + default=False, + nullable=False, + sa_column_kwargs={"comment": "Soft delete flag"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the conversation was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the conversation was last updated" + }, + ) + deleted_at: datetime | None = Field( + default=None, + nullable=True, + sa_column_kwargs={"comment": "Timestamp when the conversation was deleted"}, + ) # Relationships - project: "Project" = Relationship(back_populates="openai_conversations") - organization: "Organization" = Relationship(back_populates="openai_conversations") + project: Project = Relationship(back_populates="openai_conversations") + organization: Organization = Relationship(back_populates="openai_conversations") class OpenAIConversationCreate(SQLModel): @@ -75,17 +127,17 @@ class OpenAIConversationCreate(SQLModel): ancestor_response_id: str = Field( description="Ancestor response ID for conversation threading" ) - previous_response_id: Optional[str] = Field( + previous_response_id: str | None = Field( default=None, description="Previous response ID in the conversation" ) user_question: str = Field(description="User's question/input", min_length=1) - response: Optional[str] = Field(default=None, description="AI response") + response: str | None = Field(default=None, description="AI response") # there are models with small name like o1 and usually fine tuned models have long names model: str = Field( description="The model used for the response", min_length=1, max_length=150 ) # usually follow the pattern of asst_WD9bumYqTtpSvxxxxx - assistant_id: Optional[str] = Field( + assistant_id: str | None = Field( default=None, description="The assistant ID used", min_length=10, diff --git a/backend/app/models/organization.py b/backend/app/models/organization.py index db660891..0f936607 100644 --- a/backend/app/models/organization.py +++ b/backend/app/models/organization.py @@ -15,8 +15,18 @@ # Shared properties for an Organization class OrganizationBase(SQLModel): - name: str = Field(unique=True, index=True, max_length=255) - is_active: bool = True + """Base model for organizations with common data fields.""" + + name: str = Field( + unique=True, + index=True, + max_length=255, + sa_column_kwargs={"comment": "Organization name (unique identifier)"}, + ) + is_active: bool = Field( + default=True, + sa_column_kwargs={"comment": "Flag indicating if the organization is active"}, + ) # Properties to receive via API on creation @@ -32,11 +42,29 @@ class OrganizationUpdate(SQLModel): # Database model for Organization class Organization(OrganizationBase, table=True): - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + """Database model for organizations.""" + + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the organization"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the organization was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={ + "comment": "Timestamp when the organization was last updated" + }, + ) - # Relationship back to Creds + # Relationships creds: list["Credential"] = Relationship( back_populates="organization", cascade_delete=True ) diff --git a/backend/app/models/project.py b/backend/app/models/project.py index c0d8a87a..66111d0c 100644 --- a/backend/app/models/project.py +++ b/backend/app/models/project.py @@ -17,9 +17,22 @@ # Shared properties for a Project class ProjectBase(SQLModel): - name: str = Field(index=True, max_length=255) - description: str | None = Field(default=None, max_length=500) - is_active: bool = True + """Base model for projects with common data fields.""" + + name: str = Field( + index=True, + max_length=255, + sa_column_kwargs={"comment": "Project name"}, + ) + description: str | None = Field( + default=None, + max_length=500, + sa_column_kwargs={"comment": "Project description"}, + ) + is_active: bool = Field( + default=True, + sa_column_kwargs={"comment": "Flag indicating if the project is active"}, + ) # Properties to receive via API on creation @@ -36,18 +49,46 @@ class ProjectUpdate(SQLModel): # Database model for Project class Project(ProjectBase, table=True): + """Database model for projects.""" + __table_args__ = ( UniqueConstraint("name", "organization_id", name="uq_project_name_org_id"), ) - id: int = Field(default=None, primary_key=True) + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the project"}, + ) + storage_path: UUID = Field( + default_factory=uuid4, + nullable=False, + unique=True, + sa_column_kwargs={"comment": "Unique UUID used for cloud storage path"}, + ) + + # Foreign keys organization_id: int = Field( - foreign_key="organization.id", index=True, nullable=False, ondelete="CASCADE" + foreign_key="organization.id", + index=True, + nullable=False, + ondelete="CASCADE", + sa_column_kwargs={"comment": "Reference to the organization"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the project was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + nullable=False, + sa_column_kwargs={"comment": "Timestamp when the project was last updated"}, ) - storage_path: UUID = Field(default_factory=uuid4, nullable=False, unique=True) - inserted_at: datetime = Field(default_factory=now, nullable=False) - updated_at: datetime = Field(default_factory=now, nullable=False) + # Relationships creds: list["Credential"] = Relationship( back_populates="project", cascade_delete=True ) diff --git a/backend/app/models/threads.py b/backend/app/models/threads.py index e353c676..2753f72c 100644 --- a/backend/app/models/threads.py +++ b/backend/app/models/threads.py @@ -1,14 +1,16 @@ -from sqlmodel import SQLModel, Field -from typing import Optional from datetime import datetime +from sqlmodel import Field, SQLModel + +from app.core.util import now + class OpenAIThreadBase(SQLModel): thread_id: str = Field(index=True, unique=True) prompt: str - response: Optional[str] = None - status: Optional[str] = None - error: Optional[str] = None + response: str | None = None + status: str | None = None + error: str | None = None class OpenAIThreadCreate(OpenAIThreadBase): @@ -16,6 +18,43 @@ class OpenAIThreadCreate(OpenAIThreadBase): class OpenAI_Thread(OpenAIThreadBase, table=True): - id: int = Field(default=None, primary_key=True) - inserted_at: datetime = Field(default_factory=datetime.utcnow) - updated_at: datetime = Field(default_factory=datetime.utcnow) + """Stores OpenAI thread interactions and their responses.""" + + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the thread record"}, + ) + thread_id: str = Field( + index=True, + unique=True, + sa_column_kwargs={"comment": "OpenAI thread identifier"}, + ) + prompt: str = Field( + sa_column_kwargs={"comment": "User prompt sent to the thread"}, + ) + response: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Response received from OpenAI"}, + ) + status: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Current status of the thread interaction"}, + ) + error: str | None = Field( + default=None, + sa_column_kwargs={"comment": "Error message if the interaction failed"}, + ) + + # Timestamps + inserted_at: datetime = Field( + default_factory=now, + sa_column_kwargs={"comment": "Timestamp when the record was created"}, + ) + updated_at: datetime = Field( + default_factory=now, + sa_column_kwargs={ + "comment": "Timestamp when the record was last updated", + "onupdate": now, + }, + ) diff --git a/backend/app/models/user.py b/backend/app/models/user.py index 82a98262..b3d30974 100644 --- a/backend/app/models/user.py +++ b/backend/app/models/user.py @@ -1,15 +1,32 @@ -import uuid - from pydantic import EmailStr -from sqlmodel import Field, Relationship, SQLModel +from sqlmodel import Field, SQLModel # Shared properties class UserBase(SQLModel): - email: EmailStr = Field(unique=True, index=True, max_length=255) - is_active: bool = True - is_superuser: bool = False - full_name: str | None = Field(default=None, max_length=255) + """Base model for users with common data fields.""" + + email: EmailStr = Field( + unique=True, + index=True, + max_length=255, + sa_column_kwargs={"comment": "User's email address"}, + ) + is_active: bool = Field( + default=True, + sa_column_kwargs={"comment": "Flag indicating if the user account is active"}, + ) + is_superuser: bool = Field( + default=False, + sa_column_kwargs={ + "comment": "Flag indicating if user has superuser privileges" + }, + ) + full_name: str | None = Field( + default=None, + max_length=255, + sa_column_kwargs={"comment": "User's full name"}, + ) # Properties to receive via API on creation @@ -46,8 +63,16 @@ class UpdatePassword(SQLModel): # Database model, database table inferred from class name class User(UserBase, table=True): - id: int = Field(default=None, primary_key=True) - hashed_password: str + """Database model for users.""" + + id: int = Field( + default=None, + primary_key=True, + sa_column_kwargs={"comment": "Unique identifier for the user"}, + ) + hashed_password: str = Field( + sa_column_kwargs={"comment": "Bcrypt hash of the user's password"}, + ) class UserOrganization(UserBase):