project-jarvis-google
diff --git a/‎agent-app/app/agent.py‎
Lines changed: 1 addition & 1 deletion b/‎agent-app/app/agent.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agent-app/app/sub_agents/data_model_discovery_agent/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎agent-app/app/sub_agents/data_model_discovery_agent/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎agent-app/app/sub_agents/data_model_discovery_agent/agent.py‎
Lines changed: 25 additions & 8 deletions b/‎agent-app/app/sub_agents/data_model_discovery_agent/agent.py‎
Lines changed: 25 additions & 8 deletions
diff --git a/‎agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/agent.py‎
Lines changed: 3 additions & 3 deletions b/‎agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/agent.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/tools.py‎
Lines changed: 50 additions & 19 deletions b/‎agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/tools.py‎
Lines changed: 50 additions & 19 deletions
diff --git a/‎agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/utils/mssql_profiling_utils.py‎
Lines changed: 49 additions & 19 deletions b/‎agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/utils/mssql_profiling_utils.py‎
Lines changed: 49 additions & 19 deletions
@@ -68,6 +68,6 @@
         capability_mapper_agent,
         strategy_recommender_agent,
         detailed_architecture_design_agent,
-        data_model_discovery_agent
+        data_model_discovery_agent,
     ],
 )
@@ -1 +1 @@
-from .agent import data_model_discovery_agent
+from .agent import data_model_discovery_agent
@@ -12,6 +12,7 @@
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 
+
 def root_agent_instruction(ctx: ReadonlyContext) -> str:
     """Dynamically builds the Root Agent's instruction based on session state."""
     selected_schema = ctx.state.get("selected_schema")
@@ -110,7 +111,9 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
     """
 
     if not db_connection or db_connection.get("status") != "connected":
-        return base_instruction + """
+        return (
+            base_instruction
+            + """
         **Current State:** No active database connection.
 
         **Your Task:**
@@ -140,18 +143,24 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
             - Answer questions about your data and schema structure
           To do any of this, I'll first need to connect to your database. Just let me know when you want to proceed!"
         """
+        )
     elif available_schemas and not selected_schema:
-        return base_instruction + """
+        return (
+            base_instruction
+            + """
     **Current Task:** The user has been presented with a list of available schemas by the `database_cred_agent`. Their current input is expected to be the name of the schema they wish to analyze.
 
     1.  Consider the user's entire input as the desired schema name.
     2.  You MUST call the `schema_introspection_agent`. Pass the user's input as the primary query to this sub-agent. The `schema_introspection_agent` is designed to take this input as the schema name for its operations.
         - Example AgentTool Call: `schema_introspection_agent(user_input)`
     3.  The `schema_introspection_agent` will handle storing the selected schema and fetching the details. Await its response.
         """
+        )
     elif selected_schema and schema_structure:
         profile_status = "Completed" if data_profile else "Not Yet Run"
-        return base_instruction + f"""
+        return (
+            base_instruction
+            + f"""
     **Current Context:** The database is connected. The schema '{selected_schema}' has been successfully introspected.
     Data Quality Profile Status: {profile_status}
 
@@ -171,20 +180,28 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
 
     If the user's intent is unclear, ask for clarification. You can remind them of the available actions.
         """
+        )
     elif selected_schema and not schema_structure:
-         return base_instruction + f"""
+        return (
+            base_instruction
+            + f"""
     **Current Context:** The schema '{selected_schema}' was selected, but the introspection data is missing or incomplete.
     - Recall `schema_introspection_agent` and pass the schema name '{selected_schema}' as the input to it to ensure the structure is loaded.
     - Example AgentTool Call: `schema_introspection_agent("{selected_schema}")`
          """
+        )
     else:
-        return base_instruction + """
+        return (
+            base_instruction
+            + """
     **Current Task:** Determine the next step based on the conversation history and session state. If unsure, ask the user for clarification.
         """
+        )
+
 
 data_model_discovery_agent = LlmAgent(
-    model='gemini-2.5-flash',
-    name='data_model_discovery_agent',
+    model="gemini-2.5-flash",
+    name="data_model_discovery_agent",
     description=(
         "A helpful root agent that orchestrates sub-agents to introspect and profile legacy databases."
     ),
@@ -195,5 +212,5 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
         qa_agent,
         data_profiling_agent,
         reporting_agent,
-    ]
+    ],
 )
@@ -3,9 +3,9 @@
 from ..qa_agent.agent import qa_agent
 
 data_profiling_agent = LlmAgent(
-    model='gemini-2.5-flash',
-    name='data_profiling_agent',
-    description='Profiles data quality for the selected schema and then calls QA agent to summarize.',
+    model="gemini-2.5-flash",
+    name="data_profiling_agent",
+    description="Profiles data quality for the selected schema and then calls QA agent to summarize.",
     instruction="""
     ### Role
     You are a **Data Profiling Agent**. Your sole responsibility is to run data profiling on a schema and then immediately hand off the summary of findings to the QA agent for user-facing reporting.  
 
@@ -4,29 +4,43 @@
 import psycopg2
 import mysql.connector
 import pyodbc
-from .utils import postgres_profiling_utils, mysql_profiling_utils, mssql_profiling_utils
+from .utils import (
+    postgres_profiling_utils,
+    mysql_profiling_utils,
+    mssql_profiling_utils,
+)
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 
+
 def _get_db_connection(metadata: Dict[str, Any], password: str) -> Any:
     db_type = metadata.get("db_type")
     host = metadata.get("host")
     port = int(metadata.get("port"))
     dbname = metadata.get("dbname")
     user = metadata.get("user")
-    logger.info(f"Attempting to connect to {db_type} at {host}:{port} as {user} to database {dbname}")
+    logger.info(
+        f"Attempting to connect to {db_type} at {host}:{port} as {user} to database {dbname}"
+    )
     if db_type == "postgresql":
-        return psycopg2.connect(host=host, port=port, dbname=dbname, user=user, password=password)
+        return psycopg2.connect(
+            host=host, port=port, dbname=dbname, user=user, password=password
+        )
     elif db_type == "mysql":
-        return mysql.connector.connect(host=host, port=port, database=dbname, user=user, password=password)
+        return mysql.connector.connect(
+            host=host, port=port, database=dbname, user=user, password=password
+        )
     elif db_type == "mssql":
         conn_str = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={host},{port};DATABASE={dbname};UID={user};PWD={password}"
         return pyodbc.connect(conn_str)
     else:
         raise ValueError(f"Unsupported database type: {db_type}")
 
-async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -> Dict[str, Any]:
+
+async def profile_schema_data(
+    tool_context: ToolContext, args: Dict[str, Any]
+) -> Dict[str, Any]:
     """
     Profiles the data in the selected schema based on the schema structure.
     Calculates nullability, cardinality, orphan records, and type anomalies.
@@ -41,10 +55,14 @@ async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -
     schema_structure = tool_context.state.get("schema_structure")
     sample_size = args.get("sample_size", 10000)
 
-    if not db_conn_state or db_conn_state.get("status") != "connected": return {"error": "DB not connected."}
-    if not db_creds: return {"error": "DB credentials not found."}
-    if not schema_name: return {"error": "Selected schema not found."}
-    if not schema_structure: return {"error": "Schema structure not found. Please run introspection first."}
+    if not db_conn_state or db_conn_state.get("status") != "connected":
+        return {"error": "DB not connected."}
+    if not db_creds:
+        return {"error": "DB credentials not found."}
+    if not schema_name:
+        return {"error": "Selected schema not found."}
+    if not schema_structure:
+        return {"error": "Schema structure not found. Please run introspection first."}
 
     metadata = db_conn_state["metadata"]
     password = db_creds["password"]
@@ -53,20 +71,30 @@ async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -
     conn = None
     try:
         conn = _get_db_connection(metadata, password)
-        logger.info(f"Reconnected to {db_type} for data profiling of schema '{schema_name}'.")
+        logger.info(
+            f"Reconnected to {db_type} for data profiling of schema '{schema_name}'."
+        )
 
         if db_type == "postgresql":
-            profile_results = postgres_profiling_utils.profile_postgres_data(conn, schema_name, schema_structure, sample_size)
+            profile_results = postgres_profiling_utils.profile_postgres_data(
+                conn, schema_name, schema_structure, sample_size
+            )
         elif db_type == "mysql":
-            profile_results = mysql_profiling_utils.profile_mysql_data(conn, schema_name, schema_structure, sample_size)
+            profile_results = mysql_profiling_utils.profile_mysql_data(
+                conn, schema_name, schema_structure, sample_size
+            )
         elif db_type == "mssql":
-            profile_results = mssql_profiling_utils.profile_mssql_data(conn, schema_name, schema_structure, sample_size)
+            profile_results = mssql_profiling_utils.profile_mssql_data(
+                conn, schema_name, schema_structure, sample_size
+            )
         else:
             return {"error": f"Profiling for {db_type} not implemented."}
 
         tool_context.state["data_profile"] = profile_results
-        tool_context.state["profiling_just_completed"] = True # Set the flag
-        logger.info(f"Data profiling results for '{schema_name}' saved to session state.")
+        tool_context.state["profiling_just_completed"] = True  # Set the flag
+        logger.info(
+            f"Data profiling results for '{schema_name}' saved to session state."
+        )
 
         return {
             "status": "success",
@@ -75,9 +103,12 @@ async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -
         }
     except Exception as e:
         logger.error(f"Error during data profiling: {e}", exc_info=True)
-        return {"error": f"Failed to profile data for {db_type} ({schema_name}): {str(e)}"}
+        return {
+            "error": f"Failed to profile data for {db_type} ({schema_name}): {str(e)}"
+        }
     finally:
         if conn:
-            try: conn.close()
-            except Exception as e: logger.error(f"Error closing {db_type} connection: {e}")
-            
+            try:
+                conn.close()
+            except Exception as e:
+                logger.error(f"Error closing {db_type} connection: {e}")
@@ -5,6 +5,7 @@
 
 logger = logging.getLogger(__name__)
 
+
 def _execute_query(conn: Any, query: str) -> List[Dict[str, Any]]:
     """Executes a SQL query and returns results as a list of dicts for SQL Server."""
     cursor = conn.cursor()
@@ -18,8 +19,19 @@ def _execute_query(conn: Any, query: str) -> List[Dict[str, Any]]:
     finally:
         cursor.close()
 
-def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str, Any], sample_size: int = 10000) -> Dict[str, Any]:
-    profile_results = {"nullability": {}, "cardinality": {}, "orphan_records": {}, "type_anomalies": {}}
+
+def profile_mssql_data(
+    conn: Any,
+    schema_name: str,
+    schema_structure: Dict[str, Any],
+    sample_size: int = 10000,
+) -> Dict[str, Any]:
+    profile_results = {
+        "nullability": {},
+        "cardinality": {},
+        "orphan_records": {},
+        "type_anomalies": {},
+    }
     tables = schema_structure.get("tables", {})
 
     for table_name, table_info in tables.items():
@@ -37,30 +49,38 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
             """
             try:
                 res = _execute_query(conn, null_q)[0]
-                total_count = int(res['total_count'])
-                null_count = int(res['null_count'])
+                total_count = int(res["total_count"])
+                null_count = int(res["null_count"])
                 null_pct = (null_count / total_count) * 100 if total_count > 0 else 0
-                profile_results["nullability"][table_name][col_name] = round(null_pct, 2)
+                profile_results["nullability"][table_name][col_name] = round(
+                    null_pct, 2
+                )
             except Exception as e:
-                logger.error(f"Error profiling nulls for {full_table_name}.[{col_name}]: {e}")
+                logger.error(
+                    f"Error profiling nulls for {full_table_name}.[{col_name}]: {e}"
+                )
                 profile_results["nullability"][table_name][col_name] = "Error"
 
         key_columns = set()
         for const in table_info.get("constraints", []):
             if const.get("type") in ("PRIMARY KEY", "UNIQUE") and const.get("columns"):
-                 key_columns.add(const["columns"])
+                key_columns.add(const["columns"])
         for fk in schema_structure.get("foreign_keys", []):
             if fk.get("from_table") == table_name and fk.get("from_column"):
                 key_columns.add(fk["from_column"])
 
         for col_name in key_columns:
-             if col_name in table_info.get("columns", {}):
+            if col_name in table_info.get("columns", {}):
                 card_q = f"SELECT COUNT(DISTINCT [{col_name}]) as unique_count FROM {full_table_name};"
                 try:
                     res = _execute_query(conn, card_q)[0]
-                    profile_results["cardinality"][table_name][col_name] = int(res['unique_count'])
+                    profile_results["cardinality"][table_name][col_name] = int(
+                        res["unique_count"]
+                    )
                 except Exception as e:
-                    logger.error(f"Error profiling cardinality for {full_table_name}.[{col_name}]: {e}")
+                    logger.error(
+                        f"Error profiling cardinality for {full_table_name}.[{col_name}]: {e}"
+                    )
                     profile_results["cardinality"][table_name][col_name] = "Error"
 
     for fk in schema_structure.get("foreign_keys", []):
@@ -81,9 +101,11 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
             """
             try:
                 res = _execute_query(conn, orphan_q)[0]
-                total_fk_values = int(res['total_fk_values'])
-                orphan_count = int(res['orphan_count'])
-                orphan_pct = (orphan_count / total_fk_values) * 100 if total_fk_values > 0 else 0
+                total_fk_values = int(res["total_fk_values"])
+                orphan_count = int(res["orphan_count"])
+                orphan_pct = (
+                    (orphan_count / total_fk_values) * 100 if total_fk_values > 0 else 0
+                )
                 profile_results["orphan_records"][fk_name] = round(orphan_pct, 2)
             except Exception as e:
                 logger.error(f"Error checking orphans for {fk_name}: {e}")
@@ -92,9 +114,13 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
     for table_name, table_info in tables.items():
         full_table_name = f"[{schema_name}].[{table_name}]"
         for col_name, col_info in table_info.get("columns", {}).items():
-             col_type = col_info.get("type", "").lower()
-             if "char" in col_type or "text" in col_type or "varchar" in col_type:
-                if "phone" in col_name.lower() or "zip" in col_name.lower() or "postal" in col_name.lower():
+            col_type = col_info.get("type", "").lower()
+            if "char" in col_type or "text" in col_type or "varchar" in col_type:
+                if (
+                    "phone" in col_name.lower()
+                    or "zip" in col_name.lower()
+                    or "postal" in col_name.lower()
+                ):
                     # Regex for anything not a digit, hyphen, or period
                     anomaly_q = f"""
                     SELECT COUNT_BIG(*) as non_numeric_count
@@ -103,12 +129,16 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
                     """
                     try:
                         res = _execute_query(conn, anomaly_q)[0]
-                        non_numeric_count = int(res['non_numeric_count'])
+                        non_numeric_count = int(res["non_numeric_count"])
                         if non_numeric_count > 0:
                             key = f"{table_name}.{col_name}"
                             if key not in profile_results["type_anomalies"]:
                                 profile_results["type_anomalies"][key] = []
-                            profile_results["type_anomalies"][key].append(f"Found {non_numeric_count} rows with non-numeric characters in sample.")
+                            profile_results["type_anomalies"][key].append(
+                                f"Found {non_numeric_count} rows with non-numeric characters in sample."
+                            )
                     except Exception as e:
-                         logger.warning(f"Error checking type anomaly for {full_table_name}.[{col_name}]: {e}")
+                        logger.warning(
+                            f"Error checking type anomaly for {full_table_name}.[{col_name}]: {e}"
+                        )
     return profile_results
Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,6 @@`
`68`	`68`	`capability_mapper_agent,`
`69`	`69`	`strategy_recommender_agent,`
`70`	`70`	`detailed_architecture_design_agent,`
`71`		`- data_model_discovery_agent`
	`71`	`+ data_model_discovery_agent,`
`72`	`72`	`],`
`73`	`73`	`)`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from .agent import data_model_discovery_agent`
	`1`	`+from .agent import data_model_discovery_agent`