Skip to content

Commit c4354cd

Browse files
feat(data-model-discovery-agent): fix linting issue
1 parent f4b563e commit c4354cd

File tree

18 files changed

+634
-241
lines changed

18 files changed

+634
-241
lines changed

agent-app/app/agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,6 @@
6868
capability_mapper_agent,
6969
strategy_recommender_agent,
7070
detailed_architecture_design_agent,
71-
data_model_discovery_agent
71+
data_model_discovery_agent,
7272
],
7373
)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
from .agent import data_model_discovery_agent
1+
from .agent import data_model_discovery_agent

agent-app/app/sub_agents/data_model_discovery_agent/agent.py

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
logger = logging.getLogger(__name__)
1313
logging.basicConfig(level=logging.INFO)
1414

15+
1516
def root_agent_instruction(ctx: ReadonlyContext) -> str:
1617
"""Dynamically builds the Root Agent's instruction based on session state."""
1718
selected_schema = ctx.state.get("selected_schema")
@@ -110,7 +111,9 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
110111
"""
111112

112113
if not db_connection or db_connection.get("status") != "connected":
113-
return base_instruction + """
114+
return (
115+
base_instruction
116+
+ """
114117
**Current State:** No active database connection.
115118
116119
**Your Task:**
@@ -140,18 +143,24 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
140143
- Answer questions about your data and schema structure
141144
To do any of this, I'll first need to connect to your database. Just let me know when you want to proceed!"
142145
"""
146+
)
143147
elif available_schemas and not selected_schema:
144-
return base_instruction + """
148+
return (
149+
base_instruction
150+
+ """
145151
**Current Task:** The user has been presented with a list of available schemas by the `database_cred_agent`. Their current input is expected to be the name of the schema they wish to analyze.
146152
147153
1. Consider the user's entire input as the desired schema name.
148154
2. You MUST call the `schema_introspection_agent`. Pass the user's input as the primary query to this sub-agent. The `schema_introspection_agent` is designed to take this input as the schema name for its operations.
149155
- Example AgentTool Call: `schema_introspection_agent(user_input)`
150156
3. The `schema_introspection_agent` will handle storing the selected schema and fetching the details. Await its response.
151157
"""
158+
)
152159
elif selected_schema and schema_structure:
153160
profile_status = "Completed" if data_profile else "Not Yet Run"
154-
return base_instruction + f"""
161+
return (
162+
base_instruction
163+
+ f"""
155164
**Current Context:** The database is connected. The schema '{selected_schema}' has been successfully introspected.
156165
Data Quality Profile Status: {profile_status}
157166
@@ -171,20 +180,28 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
171180
172181
If the user's intent is unclear, ask for clarification. You can remind them of the available actions.
173182
"""
183+
)
174184
elif selected_schema and not schema_structure:
175-
return base_instruction + f"""
185+
return (
186+
base_instruction
187+
+ f"""
176188
**Current Context:** The schema '{selected_schema}' was selected, but the introspection data is missing or incomplete.
177189
- Recall `schema_introspection_agent` and pass the schema name '{selected_schema}' as the input to it to ensure the structure is loaded.
178190
- Example AgentTool Call: `schema_introspection_agent("{selected_schema}")`
179191
"""
192+
)
180193
else:
181-
return base_instruction + """
194+
return (
195+
base_instruction
196+
+ """
182197
**Current Task:** Determine the next step based on the conversation history and session state. If unsure, ask the user for clarification.
183198
"""
199+
)
200+
184201

185202
data_model_discovery_agent = LlmAgent(
186-
model='gemini-2.5-flash',
187-
name='data_model_discovery_agent',
203+
model="gemini-2.5-flash",
204+
name="data_model_discovery_agent",
188205
description=(
189206
"A helpful root agent that orchestrates sub-agents to introspect and profile legacy databases."
190207
),
@@ -195,5 +212,5 @@ def root_agent_instruction(ctx: ReadonlyContext) -> str:
195212
qa_agent,
196213
data_profiling_agent,
197214
reporting_agent,
198-
]
215+
],
199216
)

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/agent.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@
33
from ..qa_agent.agent import qa_agent
44

55
data_profiling_agent = LlmAgent(
6-
model='gemini-2.5-flash',
7-
name='data_profiling_agent',
8-
description='Profiles data quality for the selected schema and then calls QA agent to summarize.',
6+
model="gemini-2.5-flash",
7+
name="data_profiling_agent",
8+
description="Profiles data quality for the selected schema and then calls QA agent to summarize.",
99
instruction="""
1010
### Role
1111
You are a **Data Profiling Agent**. Your sole responsibility is to run data profiling on a schema and then immediately hand off the summary of findings to the QA agent for user-facing reporting.

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/tools.py

Lines changed: 50 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,43 @@
44
import psycopg2
55
import mysql.connector
66
import pyodbc
7-
from .utils import postgres_profiling_utils, mysql_profiling_utils, mssql_profiling_utils
7+
from .utils import (
8+
postgres_profiling_utils,
9+
mysql_profiling_utils,
10+
mssql_profiling_utils,
11+
)
812

913
logger = logging.getLogger(__name__)
1014
logging.basicConfig(level=logging.INFO)
1115

16+
1217
def _get_db_connection(metadata: Dict[str, Any], password: str) -> Any:
1318
db_type = metadata.get("db_type")
1419
host = metadata.get("host")
1520
port = int(metadata.get("port"))
1621
dbname = metadata.get("dbname")
1722
user = metadata.get("user")
18-
logger.info(f"Attempting to connect to {db_type} at {host}:{port} as {user} to database {dbname}")
23+
logger.info(
24+
f"Attempting to connect to {db_type} at {host}:{port} as {user} to database {dbname}"
25+
)
1926
if db_type == "postgresql":
20-
return psycopg2.connect(host=host, port=port, dbname=dbname, user=user, password=password)
27+
return psycopg2.connect(
28+
host=host, port=port, dbname=dbname, user=user, password=password
29+
)
2130
elif db_type == "mysql":
22-
return mysql.connector.connect(host=host, port=port, database=dbname, user=user, password=password)
31+
return mysql.connector.connect(
32+
host=host, port=port, database=dbname, user=user, password=password
33+
)
2334
elif db_type == "mssql":
2435
conn_str = f"DRIVER={{ODBC Driver 17 for SQL Server}};SERVER={host},{port};DATABASE={dbname};UID={user};PWD={password}"
2536
return pyodbc.connect(conn_str)
2637
else:
2738
raise ValueError(f"Unsupported database type: {db_type}")
2839

29-
async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -> Dict[str, Any]:
40+
41+
async def profile_schema_data(
42+
tool_context: ToolContext, args: Dict[str, Any]
43+
) -> Dict[str, Any]:
3044
"""
3145
Profiles the data in the selected schema based on the schema structure.
3246
Calculates nullability, cardinality, orphan records, and type anomalies.
@@ -41,10 +55,14 @@ async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -
4155
schema_structure = tool_context.state.get("schema_structure")
4256
sample_size = args.get("sample_size", 10000)
4357

44-
if not db_conn_state or db_conn_state.get("status") != "connected": return {"error": "DB not connected."}
45-
if not db_creds: return {"error": "DB credentials not found."}
46-
if not schema_name: return {"error": "Selected schema not found."}
47-
if not schema_structure: return {"error": "Schema structure not found. Please run introspection first."}
58+
if not db_conn_state or db_conn_state.get("status") != "connected":
59+
return {"error": "DB not connected."}
60+
if not db_creds:
61+
return {"error": "DB credentials not found."}
62+
if not schema_name:
63+
return {"error": "Selected schema not found."}
64+
if not schema_structure:
65+
return {"error": "Schema structure not found. Please run introspection first."}
4866

4967
metadata = db_conn_state["metadata"]
5068
password = db_creds["password"]
@@ -53,20 +71,30 @@ async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -
5371
conn = None
5472
try:
5573
conn = _get_db_connection(metadata, password)
56-
logger.info(f"Reconnected to {db_type} for data profiling of schema '{schema_name}'.")
74+
logger.info(
75+
f"Reconnected to {db_type} for data profiling of schema '{schema_name}'."
76+
)
5777

5878
if db_type == "postgresql":
59-
profile_results = postgres_profiling_utils.profile_postgres_data(conn, schema_name, schema_structure, sample_size)
79+
profile_results = postgres_profiling_utils.profile_postgres_data(
80+
conn, schema_name, schema_structure, sample_size
81+
)
6082
elif db_type == "mysql":
61-
profile_results = mysql_profiling_utils.profile_mysql_data(conn, schema_name, schema_structure, sample_size)
83+
profile_results = mysql_profiling_utils.profile_mysql_data(
84+
conn, schema_name, schema_structure, sample_size
85+
)
6286
elif db_type == "mssql":
63-
profile_results = mssql_profiling_utils.profile_mssql_data(conn, schema_name, schema_structure, sample_size)
87+
profile_results = mssql_profiling_utils.profile_mssql_data(
88+
conn, schema_name, schema_structure, sample_size
89+
)
6490
else:
6591
return {"error": f"Profiling for {db_type} not implemented."}
6692

6793
tool_context.state["data_profile"] = profile_results
68-
tool_context.state["profiling_just_completed"] = True # Set the flag
69-
logger.info(f"Data profiling results for '{schema_name}' saved to session state.")
94+
tool_context.state["profiling_just_completed"] = True # Set the flag
95+
logger.info(
96+
f"Data profiling results for '{schema_name}' saved to session state."
97+
)
7098

7199
return {
72100
"status": "success",
@@ -75,9 +103,12 @@ async def profile_schema_data(tool_context: ToolContext, args: Dict[str, Any]) -
75103
}
76104
except Exception as e:
77105
logger.error(f"Error during data profiling: {e}", exc_info=True)
78-
return {"error": f"Failed to profile data for {db_type} ({schema_name}): {str(e)}"}
106+
return {
107+
"error": f"Failed to profile data for {db_type} ({schema_name}): {str(e)}"
108+
}
79109
finally:
80110
if conn:
81-
try: conn.close()
82-
except Exception as e: logger.error(f"Error closing {db_type} connection: {e}")
83-
111+
try:
112+
conn.close()
113+
except Exception as e:
114+
logger.error(f"Error closing {db_type} connection: {e}")

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/utils/mssql_profiling_utils.py

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
logger = logging.getLogger(__name__)
77

8+
89
def _execute_query(conn: Any, query: str) -> List[Dict[str, Any]]:
910
"""Executes a SQL query and returns results as a list of dicts for SQL Server."""
1011
cursor = conn.cursor()
@@ -18,8 +19,19 @@ def _execute_query(conn: Any, query: str) -> List[Dict[str, Any]]:
1819
finally:
1920
cursor.close()
2021

21-
def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str, Any], sample_size: int = 10000) -> Dict[str, Any]:
22-
profile_results = {"nullability": {}, "cardinality": {}, "orphan_records": {}, "type_anomalies": {}}
22+
23+
def profile_mssql_data(
24+
conn: Any,
25+
schema_name: str,
26+
schema_structure: Dict[str, Any],
27+
sample_size: int = 10000,
28+
) -> Dict[str, Any]:
29+
profile_results = {
30+
"nullability": {},
31+
"cardinality": {},
32+
"orphan_records": {},
33+
"type_anomalies": {},
34+
}
2335
tables = schema_structure.get("tables", {})
2436

2537
for table_name, table_info in tables.items():
@@ -37,30 +49,38 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
3749
"""
3850
try:
3951
res = _execute_query(conn, null_q)[0]
40-
total_count = int(res['total_count'])
41-
null_count = int(res['null_count'])
52+
total_count = int(res["total_count"])
53+
null_count = int(res["null_count"])
4254
null_pct = (null_count / total_count) * 100 if total_count > 0 else 0
43-
profile_results["nullability"][table_name][col_name] = round(null_pct, 2)
55+
profile_results["nullability"][table_name][col_name] = round(
56+
null_pct, 2
57+
)
4458
except Exception as e:
45-
logger.error(f"Error profiling nulls for {full_table_name}.[{col_name}]: {e}")
59+
logger.error(
60+
f"Error profiling nulls for {full_table_name}.[{col_name}]: {e}"
61+
)
4662
profile_results["nullability"][table_name][col_name] = "Error"
4763

4864
key_columns = set()
4965
for const in table_info.get("constraints", []):
5066
if const.get("type") in ("PRIMARY KEY", "UNIQUE") and const.get("columns"):
51-
key_columns.add(const["columns"])
67+
key_columns.add(const["columns"])
5268
for fk in schema_structure.get("foreign_keys", []):
5369
if fk.get("from_table") == table_name and fk.get("from_column"):
5470
key_columns.add(fk["from_column"])
5571

5672
for col_name in key_columns:
57-
if col_name in table_info.get("columns", {}):
73+
if col_name in table_info.get("columns", {}):
5874
card_q = f"SELECT COUNT(DISTINCT [{col_name}]) as unique_count FROM {full_table_name};"
5975
try:
6076
res = _execute_query(conn, card_q)[0]
61-
profile_results["cardinality"][table_name][col_name] = int(res['unique_count'])
77+
profile_results["cardinality"][table_name][col_name] = int(
78+
res["unique_count"]
79+
)
6280
except Exception as e:
63-
logger.error(f"Error profiling cardinality for {full_table_name}.[{col_name}]: {e}")
81+
logger.error(
82+
f"Error profiling cardinality for {full_table_name}.[{col_name}]: {e}"
83+
)
6484
profile_results["cardinality"][table_name][col_name] = "Error"
6585

6686
for fk in schema_structure.get("foreign_keys", []):
@@ -81,9 +101,11 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
81101
"""
82102
try:
83103
res = _execute_query(conn, orphan_q)[0]
84-
total_fk_values = int(res['total_fk_values'])
85-
orphan_count = int(res['orphan_count'])
86-
orphan_pct = (orphan_count / total_fk_values) * 100 if total_fk_values > 0 else 0
104+
total_fk_values = int(res["total_fk_values"])
105+
orphan_count = int(res["orphan_count"])
106+
orphan_pct = (
107+
(orphan_count / total_fk_values) * 100 if total_fk_values > 0 else 0
108+
)
87109
profile_results["orphan_records"][fk_name] = round(orphan_pct, 2)
88110
except Exception as e:
89111
logger.error(f"Error checking orphans for {fk_name}: {e}")
@@ -92,9 +114,13 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
92114
for table_name, table_info in tables.items():
93115
full_table_name = f"[{schema_name}].[{table_name}]"
94116
for col_name, col_info in table_info.get("columns", {}).items():
95-
col_type = col_info.get("type", "").lower()
96-
if "char" in col_type or "text" in col_type or "varchar" in col_type:
97-
if "phone" in col_name.lower() or "zip" in col_name.lower() or "postal" in col_name.lower():
117+
col_type = col_info.get("type", "").lower()
118+
if "char" in col_type or "text" in col_type or "varchar" in col_type:
119+
if (
120+
"phone" in col_name.lower()
121+
or "zip" in col_name.lower()
122+
or "postal" in col_name.lower()
123+
):
98124
# Regex for anything not a digit, hyphen, or period
99125
anomaly_q = f"""
100126
SELECT COUNT_BIG(*) as non_numeric_count
@@ -103,12 +129,16 @@ def profile_mssql_data(conn: Any, schema_name: str, schema_structure: Dict[str,
103129
"""
104130
try:
105131
res = _execute_query(conn, anomaly_q)[0]
106-
non_numeric_count = int(res['non_numeric_count'])
132+
non_numeric_count = int(res["non_numeric_count"])
107133
if non_numeric_count > 0:
108134
key = f"{table_name}.{col_name}"
109135
if key not in profile_results["type_anomalies"]:
110136
profile_results["type_anomalies"][key] = []
111-
profile_results["type_anomalies"][key].append(f"Found {non_numeric_count} rows with non-numeric characters in sample.")
137+
profile_results["type_anomalies"][key].append(
138+
f"Found {non_numeric_count} rows with non-numeric characters in sample."
139+
)
112140
except Exception as e:
113-
logger.warning(f"Error checking type anomaly for {full_table_name}.[{col_name}]: {e}")
141+
logger.warning(
142+
f"Error checking type anomaly for {full_table_name}.[{col_name}]: {e}"
143+
)
114144
return profile_results

0 commit comments

Comments
 (0)