Skip to content

Commit fdc59c3

Browse files
sanughosh-googlearjunvijaygoogle
authored andcommitted
feat(data-model-discovery-agent): fix ruff issues
1 parent 3d8311e commit fdc59c3

File tree

11 files changed

+152
-100
lines changed

11 files changed

+152
-100
lines changed

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/tools.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919
def _get_db_connection(metadata: dict[str, Any], password: str) -> Any:
2020
db_type = metadata.get("db_type")
2121
host = metadata.get("host")
22-
port = int(metadata.get("port"))
22+
port_value = metadata.get("port")
23+
port = int(port_value) if port_value is not None else None
2324
dbname = metadata.get("dbname")
2425
user = metadata.get("user")
2526
logger.info(
@@ -48,8 +49,6 @@ async def profile_schema_data(
4849
Calculates nullability, cardinality, orphan records, and type anomalies.
4950
Sets a flag on successful completion.
5051
"""
51-
if not isinstance(args, dict):
52-
return {"error": "Invalid arguments. Expected a dictionary for args."}
5352

5453
db_conn_state = tool_context.state.get("db_connection")
5554
db_creds = tool_context.state.get("db_creds_temp")

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/utils/mssql_profiling_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def profile_mssql_data(
2424
schema_structure: dict[str, Any],
2525
sample_size: int = 10000,
2626
) -> dict[str, Any]:
27-
profile_results = {
27+
profile_results: dict[str, Any] = {
2828
"nullability": {},
2929
"cardinality": {},
3030
"orphan_records": {},

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/utils/mysql_profiling_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def profile_mysql_data(
2525
logger.error(f"Failed to set database {schema_name}: {e}")
2626
raise
2727

28-
profile_results = {
28+
profile_results: dict[str, Any] = {
2929
"nullability": {},
3030
"cardinality": {},
3131
"orphan_records": {},

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/data_profiling_agent/utils/postgres_profiling_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def profile_postgres_data(
2525
schema_structure: dict[str, Any],
2626
sample_size: int = 10000,
2727
) -> dict[str, Any]:
28-
profile_results = {
28+
profile_results: dict[str, dict] = {
2929
"nullability": {},
3030
"cardinality": {},
3131
"orphan_records": {},

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/database_cred_agent/tools.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -121,11 +121,11 @@ async def validate_db_connection(
121121

122122
# Clear any previous connection state
123123
if "db_connection" in tool_context.state:
124-
del tool_context.state["db_connection"]
124+
tool_context.state["db_connection"] = None
125125
if "db_creds_temp" in tool_context.state:
126-
del tool_context.state["db_creds_temp"]
126+
tool_context.state["db_creds_temp"] = None
127127
if "selected_schema" in tool_context.state:
128-
del tool_context.state["selected_schema"]
128+
tool_context.state["selected_schema"] = None
129129

130130
tool_context.state["db_connection"] = {
131131
"metadata": {
@@ -151,9 +151,9 @@ async def validate_db_connection(
151151
except Exception as e:
152152
logger.error(f"Database connection or schema fetch failed for {db_type}: {e}")
153153
if "db_connection" in tool_context.state:
154-
del tool_context.state["db_connection"]
154+
tool_context.state["db_connection"] = None
155155
if "db_creds_temp" in tool_context.state:
156-
del tool_context.state["db_creds_temp"]
156+
tool_context.state["db_creds_temp"] = None
157157
return {
158158
"status": "error",
159159
"message": f"Connection/Schema fetch failed for {db_type}: {e}",

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/reporting_agent/tools.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ async def generate_summary_report(
2828
- report_text: The markdown formatted summary report (on success).
2929
- error: An error message (on failure).
3030
"""
31-
if not isinstance(args, dict):
32-
return {"error": "Invalid arguments. Expected a dictionary for args."}
3331

3432
schema_structure = tool_context.state.get("schema_structure")
3533
data_profile = tool_context.state.get("data_profile")
@@ -108,11 +106,6 @@ async def export_full_report(tool_context: ToolContext, args: dict) -> dict:
108106
"error": Optional error message
109107
}
110108
"""
111-
if not isinstance(args, dict):
112-
return {
113-
"status": "error",
114-
"error": "Invalid arguments. Expected a dictionary for args.",
115-
}
116109

117110
schema_structure = tool_context.state.get("schema_structure")
118111
data_profile = tool_context.state.get("data_profile")
@@ -193,12 +186,6 @@ async def generate_erd_script(
193186
}
194187
"""
195188

196-
if not isinstance(args, dict):
197-
return {
198-
"status": "error",
199-
"error": "Invalid arguments. Expected a dictionary for args.",
200-
}
201-
202189
schema_structure = tool_context.state.get("schema_structure")
203190
if not schema_structure:
204191
return {

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/schema_introspection_agent/tools.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def _get_db_connection(metadata: dict[str, Any], password: str) -> Any:
2626
raise ValueError(
2727
"Missing one or more required connection parameters in metadata or password."
2828
)
29-
port = int(port)
29+
port = int(port) # type: ignore[arg-type]
3030
logger.info(
3131
f"Attempting to connect to {db_type} at {host}:{port} as {user} to database {dbname}"
3232
)
@@ -86,7 +86,7 @@ async def get_schema_details(
8686

8787
tool_context.state["selected_schema"] = schema_name
8888
if "available_schemas" in tool_context.state:
89-
del tool_context.state["available_schemas"]
89+
tool_context.state["available_schemas"] = None
9090

9191
metadata = db_conn_state["metadata"]
9292
password = db_creds["password"]

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/schema_introspection_agent/utils/mssql_utils.py

Lines changed: 62 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,20 @@
55
from typing import Any
66

77
import google.auth
8+
import pyodbc
89
from google import genai
910
from google.api_core import exceptions
1011
from google.genai import types
1112

1213
logger = logging.getLogger(__name__)
1314
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
1415

16+
1517
try:
1618
_, project_id = google.auth.default()
1719
GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT", project_id)
1820
except google.auth.exceptions.DefaultCredentialsError:
19-
GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT")
21+
GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT") # type: ignore[assignment]
2022

2123
if not GOOGLE_CLOUD_PROJECT:
2224
logger.warning(
@@ -27,7 +29,7 @@
2729
GOOGLE_GENAI_USE_VERTEXAI = os.environ.get(
2830
"GOOGLE_GENAI_USE_VERTEXAI", "True"
2931
).lower() in ("true", "1")
30-
MODEL = os.environ.get("MODEL", "gemini-2.5-pro")
32+
MODEL = os.environ.get("MODEL", "gemini-1.5-pro")
3133

3234
client = None
3335
if GOOGLE_CLOUD_PROJECT:
@@ -38,26 +40,30 @@
3840
location=GOOGLE_CLOUD_LOCATION,
3941
)
4042
logger.info(
41-
f"GenAI Client initialized in postgres_utils. VertexAI: {GOOGLE_GENAI_USE_VERTEXAI}, Project: {GOOGLE_CLOUD_PROJECT}, Location: {GOOGLE_CLOUD_LOCATION}, Model: {MODEL}"
43+
f"GenAI Client initialized in mssql_utils. VertexAI: {GOOGLE_GENAI_USE_VERTEXAI}, Project: {GOOGLE_CLOUD_PROJECT}, Location: {GOOGLE_CLOUD_LOCATION}, Model: {MODEL}"
4244
)
4345
except Exception as e:
44-
logger.error(f"Failed to initialize GenAI Client in postgres_utils: {e}")
46+
logger.error(f"Failed to initialize GenAI Client in mssql_utils: {e}")
4547
else:
4648
logger.error(
47-
"Cannot initialize GenAI Client in postgres_utils: GOOGLE_CLOUD_PROJECT is not set."
49+
"Cannot initialize GenAI Client in mssql_utils: GOOGLE_CLOUD_PROJECT is not set."
4850
)
4951

5052

5153
def _execute_query(conn: Any, query: str) -> list[dict[str, Any]]:
52-
"""Executes a SQL query and returns results as a list of dicts for PostgreSQL."""
54+
"""Executes a SQL query and returns results as a list of dicts for SQL Server."""
5355
cursor = conn.cursor()
5456
try:
5557
cursor.execute(query)
5658
if cursor.description:
57-
columns = [desc[0] for desc in cursor.description]
59+
columns = [column[0] for column in cursor.description]
5860
rows = cursor.fetchall()
5961
return [dict(zip(columns, row, strict=False)) for row in rows]
6062
return []
63+
except pyodbc.Error as ex:
64+
sqlstate = ex.args[0]
65+
logger.error(f"SQL Error ({sqlstate}): {ex} for query: {query}")
66+
raise
6167
finally:
6268
cursor.close()
6369

@@ -163,10 +169,10 @@ def _analyze_with_llm(
163169
logger.debug(f"****** Custom_LLM_Request: {prompt}")
164170
response = client.models.generate_content(
165171
model=MODEL,
166-
contents=[types.Part.from_text(text=prompt)],
172+
contents=[types.Part.from_text(text=prompt)], # type: ignore[arg-type]
167173
config=types.GenerateContentConfig(response_mime_type="application/json"),
168174
)
169-
generated_text = response.candidates[0].content.parts[0].text
175+
generated_text = response.candidates[0].content.parts[0].text # type: ignore[index, union-attr, assignment]
170176
logger.debug(f"****** Raw LLM Response: {generated_text}")
171177
cleaned_json = _extract_json_content(generated_text)
172178
logger.debug(
@@ -202,99 +208,90 @@ def _analyze_with_llm(
202208
}
203209

204210

205-
def get_postgres_schema_details(conn: Any, schema_name: str) -> dict[str, Any]:
206-
details = {
211+
def get_mssql_schema_details(conn: Any, schema_name: str) -> dict[str, Any]:
212+
logger.info(f"Fetching MSSQL schema details for: {schema_name}")
213+
details: dict[str, Any] = {
207214
"tables": {},
208215
"views": {},
209216
"foreign_keys": [],
210217
"inferred_relationships": [],
211218
"anomalies": [],
212219
}
213-
logger.info(f"Fetching PostgreSQL schema details for: {schema_name}")
214220

215-
tables_query = f"""
216-
SELECT table_name
217-
FROM information_schema.tables
218-
WHERE table_schema = '{schema_name}' AND table_type = 'BASE TABLE';
219-
"""
221+
tables_query = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema_name}' AND TABLE_TYPE = 'BASE TABLE';"
220222
tables = _execute_query(conn, tables_query)
221223
for table in tables:
222-
t_name = table["table_name"]
224+
t_name = table["TABLE_NAME"]
223225
details["tables"][t_name] = {"columns": {}, "constraints": [], "indexes": []}
224-
cols_query = f"""
225-
SELECT column_name, data_type, character_maximum_length, numeric_precision, numeric_scale, is_nullable, column_default
226-
FROM information_schema.columns WHERE table_schema = '{schema_name}' AND table_name = '{t_name}';
227-
"""
226+
cols_query = f"SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE, IS_NULLABLE, COLUMN_DEFAULT FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{schema_name}' AND TABLE_NAME = '{t_name}';"
228227
for col in _execute_query(conn, cols_query):
229-
details["tables"][t_name]["columns"][col["column_name"]] = {
230-
"type": col["data_type"],
231-
"length": col["character_maximum_length"],
232-
"precision": col["numeric_precision"],
233-
"scale": col["numeric_scale"],
234-
"nullable": col["is_nullable"] == "YES",
235-
"default": col["column_default"],
228+
details["tables"][t_name]["columns"][col["COLUMN_NAME"]] = {
229+
"type": col["DATA_TYPE"],
230+
"length": col["CHARACTER_MAXIMUM_LENGTH"],
231+
"precision": col["NUMERIC_PRECISION"],
232+
"scale": col["NUMERIC_SCALE"],
233+
"nullable": col["IS_NULLABLE"] == "YES",
234+
"default": col["COLUMN_DEFAULT"],
236235
}
236+
237237
constraints_query = f"""
238-
SELECT tc.table_name, tc.constraint_name, tc.constraint_type, kcu.column_name, cc.check_clause
239-
FROM information_schema.table_constraints tc
240-
LEFT JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema AND tc.table_name = kcu.table_name
241-
LEFT JOIN information_schema.check_constraints cc ON tc.constraint_name = cc.constraint_name AND tc.table_schema = cc.constraint_schema
242-
WHERE tc.table_schema = '{schema_name}' AND tc.table_name = '{t_name}';
238+
SELECT KCU.TABLE_NAME, TC.CONSTRAINT_NAME, TC.CONSTRAINT_TYPE, KCU.COLUMN_NAME, CC.CHECK_CLAUSE
239+
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS TC
240+
LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU ON TC.CONSTRAINT_NAME = KCU.CONSTRAINT_NAME AND TC.TABLE_SCHEMA = KCU.TABLE_SCHEMA AND TC.TABLE_NAME = KCU.TABLE_NAME
241+
LEFT JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS AS CC ON TC.CONSTRAINT_NAME = CC.CONSTRAINT_NAME AND TC.CONSTRAINT_SCHEMA = CC.CONSTRAINT_SCHEMA
242+
WHERE TC.TABLE_SCHEMA = '{schema_name}' AND KCU.TABLE_NAME = '{t_name}';
243243
"""
244244
details["tables"][t_name]["constraints"] = _execute_query(
245245
conn, constraints_query
246246
)
247+
247248
indexes_query = f"""
248-
SELECT
249-
t.relname AS table_name, i.relname AS index_name, a.attname AS column_name, ix.indisunique AS is_unique
250-
FROM pg_class t JOIN pg_index ix ON t.oid = ix.indrelid JOIN pg_class i ON i.oid = ix.indexrelid
251-
LEFT JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = ANY(ix.indkey)
252-
JOIN pg_namespace n ON t.relnamespace = n.oid WHERE n.nspname = '{schema_name}' AND t.relname = '{t_name}' AND t.relkind = 'r';
249+
SELECT t.name AS table_name, ind.name AS index_name, COL_NAME(ic.object_id, ic.column_id) AS column_name, ind.is_unique
250+
FROM sys.indexes ind INNER JOIN sys.index_columns ic ON ind.object_id = ic.object_id AND ind.index_id = ic.index_id
251+
INNER JOIN sys.tables t ON ind.object_id = t.object_id INNER JOIN sys.schemas s ON t.schema_id = s.schema_id
252+
WHERE s.name = '{schema_name}' AND t.name = '{t_name}' AND ind.is_hypothetical = 0 AND ind.type > 0;
253253
"""
254254
try:
255255
indexes = _execute_query(conn, indexes_query)
256256
grouped_indexes = {}
257257
for index in indexes:
258-
if index["column_name"]:
259-
idx_name = index["index_name"]
260-
if idx_name not in grouped_indexes:
261-
grouped_indexes[idx_name] = {
262-
"name": idx_name,
263-
"columns": [],
264-
"unique": index["is_unique"],
265-
}
266-
if index["column_name"] not in grouped_indexes[idx_name]["columns"]:
267-
grouped_indexes[idx_name]["columns"].append(
268-
index["column_name"]
269-
)
258+
idx_name = index["index_name"]
259+
if not idx_name:
260+
continue
261+
if idx_name not in grouped_indexes:
262+
grouped_indexes[idx_name] = {
263+
"name": idx_name,
264+
"columns": [],
265+
"unique": index["is_unique"],
266+
}
267+
if index["column_name"] not in grouped_indexes[idx_name]["columns"]:
268+
grouped_indexes[idx_name]["columns"].append(index["column_name"])
270269
details["tables"][t_name]["indexes"] = list(grouped_indexes.values())
271270
except Exception as e:
272-
logger.error(f"Error fetching PostgreSQL indexes for {t_name}: {e}")
271+
logger.error(f"Error fetching MSSQL indexes for {t_name}: {e}")
273272

274273
fks_query = f"""
275-
SELECT
276-
tc.constraint_name, tc.table_name AS from_table, kcu.column_name AS from_column,
277-
ccu.table_schema AS to_schema, ccu.table_name AS to_table, ccu.column_name AS to_column
278-
FROM information_schema.table_constraints AS tc JOIN information_schema.key_column_usage AS kcu
279-
ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema
280-
JOIN information_schema.constraint_column_usage AS ccu
281-
ON ccu.constraint_name = tc.constraint_name AND ccu.table_schema = tc.table_schema
282-
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_schema = '{schema_name}';
274+
SELECT KCU1.CONSTRAINT_NAME AS constraint_name, KCU1.TABLE_NAME AS from_table, KCU1.COLUMN_NAME AS from_column,
275+
KCU2.TABLE_SCHEMA AS to_schema, KCU2.TABLE_NAME AS to_table, KCU2.COLUMN_NAME AS to_column
276+
FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC
277+
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 ON KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME
278+
JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 ON KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION
279+
WHERE KCU1.TABLE_SCHEMA = '{schema_name}';
283280
"""
284281
details["foreign_keys"] = _execute_query(conn, fks_query)
285-
views_query = f"SELECT table_name AS view_name, view_definition FROM information_schema.views WHERE table_schema = '{schema_name}';"
282+
views_query = f"SELECT TABLE_NAME AS view_name, VIEW_DEFINITION FROM INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema_name}';"
286283
details["views"] = {
287-
view["view_name"]: {"definition": view["view_definition"]}
284+
view["view_name"]: {"definition": view["VIEW_DEFINITION"]}
288285
for view in _execute_query(conn, views_query)
289286
}
290287

291-
llm_analysis = _analyze_with_llm(schema_name, "PostgreSQL", details)
288+
llm_analysis = _analyze_with_llm(schema_name, "Microsoft SQL Server", details)
292289
details["inferred_relationships"] = llm_analysis.get("inferred_relationships", [])
293290
details["anomalies"] = llm_analysis.get("anomalies", [])
294291
logger.info(
295-
f"Found {len(details['inferred_relationships'])} potential inferred relationships for PostgreSQL."
292+
f"Found {len(details['inferred_relationships'])} potential inferred relationships for MSSQL."
296293
)
297294
logger.info(
298-
f"Found {len(details['anomalies'])} potential relationship anomalies for PostgreSQL."
295+
f"Found {len(details['anomalies'])} potential relationship anomalies for MSSQL."
299296
)
300297
return details

agent-app/app/sub_agents/data_model_discovery_agent/sub_agents/schema_introspection_agent/utils/mysql_utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
_, project_id = google.auth.default()
1818
GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT", project_id)
1919
except google.auth.exceptions.DefaultCredentialsError:
20-
GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT")
20+
GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT") # type: ignore[assignment]
2121

2222
if not GOOGLE_CLOUD_PROJECT:
2323
logger.warning(
@@ -175,9 +175,9 @@ def _analyze_with_llm(
175175
logger.debug(f"****** Custom_LLM_Request: {prompt}")
176176
response = client.models.generate_content(
177177
model=MODEL,
178-
contents=[types.Part.from_text(text=prompt)],
178+
contents=[types.Part.from_text(text=prompt)], # type: ignore[arg-type]
179179
)
180-
generated_text = response.candidates[0].content.parts[0].text
180+
generated_text = response.candidates[0].content.parts[0].text # type: ignore[index, union-attr, assignment]
181181
logger.debug(f"****** Raw LLM Response: {generated_text}")
182182

183183
# handles ```json blocks
@@ -229,7 +229,7 @@ def get_mysql_schema_details(conn: Any, schema_name: str) -> dict[str, Any]:
229229
logger.error(f"MySQL change database failed: {err}")
230230
raise
231231

232-
details = {
232+
details: dict[str, Any] = {
233233
"tables": {},
234234
"views": {},
235235
"foreign_keys": [],

0 commit comments

Comments
 (0)