|
5 | 5 | from typing import Any |
6 | 6 |
|
7 | 7 | import google.auth |
| 8 | +import pyodbc |
8 | 9 | from google import genai |
9 | 10 | from google.api_core import exceptions |
10 | 11 | from google.genai import types |
11 | 12 |
|
12 | 13 | logger = logging.getLogger(__name__) |
13 | 14 | logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") |
14 | 15 |
|
| 16 | + |
15 | 17 | try: |
16 | 18 | _, project_id = google.auth.default() |
17 | 19 | GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT", project_id) |
18 | 20 | except google.auth.exceptions.DefaultCredentialsError: |
19 | | - GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT") |
| 21 | + GOOGLE_CLOUD_PROJECT = os.environ.get("GOOGLE_CLOUD_PROJECT") # type: ignore[assignment] |
20 | 22 |
|
21 | 23 | if not GOOGLE_CLOUD_PROJECT: |
22 | 24 | logger.warning( |
|
27 | 29 | GOOGLE_GENAI_USE_VERTEXAI = os.environ.get( |
28 | 30 | "GOOGLE_GENAI_USE_VERTEXAI", "True" |
29 | 31 | ).lower() in ("true", "1") |
30 | | -MODEL = os.environ.get("MODEL", "gemini-2.5-pro") |
| 32 | +MODEL = os.environ.get("MODEL", "gemini-1.5-pro") |
31 | 33 |
|
32 | 34 | client = None |
33 | 35 | if GOOGLE_CLOUD_PROJECT: |
|
38 | 40 | location=GOOGLE_CLOUD_LOCATION, |
39 | 41 | ) |
40 | 42 | logger.info( |
41 | | - f"GenAI Client initialized in postgres_utils. VertexAI: {GOOGLE_GENAI_USE_VERTEXAI}, Project: {GOOGLE_CLOUD_PROJECT}, Location: {GOOGLE_CLOUD_LOCATION}, Model: {MODEL}" |
| 43 | + f"GenAI Client initialized in mssql_utils. VertexAI: {GOOGLE_GENAI_USE_VERTEXAI}, Project: {GOOGLE_CLOUD_PROJECT}, Location: {GOOGLE_CLOUD_LOCATION}, Model: {MODEL}" |
42 | 44 | ) |
43 | 45 | except Exception as e: |
44 | | - logger.error(f"Failed to initialize GenAI Client in postgres_utils: {e}") |
| 46 | + logger.error(f"Failed to initialize GenAI Client in mssql_utils: {e}") |
45 | 47 | else: |
46 | 48 | logger.error( |
47 | | - "Cannot initialize GenAI Client in postgres_utils: GOOGLE_CLOUD_PROJECT is not set." |
| 49 | + "Cannot initialize GenAI Client in mssql_utils: GOOGLE_CLOUD_PROJECT is not set." |
48 | 50 | ) |
49 | 51 |
|
50 | 52 |
|
51 | 53 | def _execute_query(conn: Any, query: str) -> list[dict[str, Any]]: |
52 | | - """Executes a SQL query and returns results as a list of dicts for PostgreSQL.""" |
| 54 | + """Executes a SQL query and returns results as a list of dicts for SQL Server.""" |
53 | 55 | cursor = conn.cursor() |
54 | 56 | try: |
55 | 57 | cursor.execute(query) |
56 | 58 | if cursor.description: |
57 | | - columns = [desc[0] for desc in cursor.description] |
| 59 | + columns = [column[0] for column in cursor.description] |
58 | 60 | rows = cursor.fetchall() |
59 | 61 | return [dict(zip(columns, row, strict=False)) for row in rows] |
60 | 62 | return [] |
| 63 | + except pyodbc.Error as ex: |
| 64 | + sqlstate = ex.args[0] |
| 65 | + logger.error(f"SQL Error ({sqlstate}): {ex} for query: {query}") |
| 66 | + raise |
61 | 67 | finally: |
62 | 68 | cursor.close() |
63 | 69 |
|
@@ -163,10 +169,10 @@ def _analyze_with_llm( |
163 | 169 | logger.debug(f"****** Custom_LLM_Request: {prompt}") |
164 | 170 | response = client.models.generate_content( |
165 | 171 | model=MODEL, |
166 | | - contents=[types.Part.from_text(text=prompt)], |
| 172 | + contents=[types.Part.from_text(text=prompt)], # type: ignore[arg-type] |
167 | 173 | config=types.GenerateContentConfig(response_mime_type="application/json"), |
168 | 174 | ) |
169 | | - generated_text = response.candidates[0].content.parts[0].text |
| 175 | + generated_text = response.candidates[0].content.parts[0].text # type: ignore[index, union-attr, assignment] |
170 | 176 | logger.debug(f"****** Raw LLM Response: {generated_text}") |
171 | 177 | cleaned_json = _extract_json_content(generated_text) |
172 | 178 | logger.debug( |
@@ -202,99 +208,90 @@ def _analyze_with_llm( |
202 | 208 | } |
203 | 209 |
|
204 | 210 |
|
205 | | -def get_postgres_schema_details(conn: Any, schema_name: str) -> dict[str, Any]: |
206 | | - details = { |
| 211 | +def get_mssql_schema_details(conn: Any, schema_name: str) -> dict[str, Any]: |
| 212 | + logger.info(f"Fetching MSSQL schema details for: {schema_name}") |
| 213 | + details: dict[str, Any] = { |
207 | 214 | "tables": {}, |
208 | 215 | "views": {}, |
209 | 216 | "foreign_keys": [], |
210 | 217 | "inferred_relationships": [], |
211 | 218 | "anomalies": [], |
212 | 219 | } |
213 | | - logger.info(f"Fetching PostgreSQL schema details for: {schema_name}") |
214 | 220 |
|
215 | | - tables_query = f""" |
216 | | - SELECT table_name |
217 | | - FROM information_schema.tables |
218 | | - WHERE table_schema = '{schema_name}' AND table_type = 'BASE TABLE'; |
219 | | - """ |
| 221 | + tables_query = f"SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema_name}' AND TABLE_TYPE = 'BASE TABLE';" |
220 | 222 | tables = _execute_query(conn, tables_query) |
221 | 223 | for table in tables: |
222 | | - t_name = table["table_name"] |
| 224 | + t_name = table["TABLE_NAME"] |
223 | 225 | details["tables"][t_name] = {"columns": {}, "constraints": [], "indexes": []} |
224 | | - cols_query = f""" |
225 | | - SELECT column_name, data_type, character_maximum_length, numeric_precision, numeric_scale, is_nullable, column_default |
226 | | - FROM information_schema.columns WHERE table_schema = '{schema_name}' AND table_name = '{t_name}'; |
227 | | - """ |
| 226 | + cols_query = f"SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE, IS_NULLABLE, COLUMN_DEFAULT FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = '{schema_name}' AND TABLE_NAME = '{t_name}';" |
228 | 227 | for col in _execute_query(conn, cols_query): |
229 | | - details["tables"][t_name]["columns"][col["column_name"]] = { |
230 | | - "type": col["data_type"], |
231 | | - "length": col["character_maximum_length"], |
232 | | - "precision": col["numeric_precision"], |
233 | | - "scale": col["numeric_scale"], |
234 | | - "nullable": col["is_nullable"] == "YES", |
235 | | - "default": col["column_default"], |
| 228 | + details["tables"][t_name]["columns"][col["COLUMN_NAME"]] = { |
| 229 | + "type": col["DATA_TYPE"], |
| 230 | + "length": col["CHARACTER_MAXIMUM_LENGTH"], |
| 231 | + "precision": col["NUMERIC_PRECISION"], |
| 232 | + "scale": col["NUMERIC_SCALE"], |
| 233 | + "nullable": col["IS_NULLABLE"] == "YES", |
| 234 | + "default": col["COLUMN_DEFAULT"], |
236 | 235 | } |
| 236 | + |
237 | 237 | constraints_query = f""" |
238 | | - SELECT tc.table_name, tc.constraint_name, tc.constraint_type, kcu.column_name, cc.check_clause |
239 | | - FROM information_schema.table_constraints tc |
240 | | - LEFT JOIN information_schema.key_column_usage kcu ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema AND tc.table_name = kcu.table_name |
241 | | - LEFT JOIN information_schema.check_constraints cc ON tc.constraint_name = cc.constraint_name AND tc.table_schema = cc.constraint_schema |
242 | | - WHERE tc.table_schema = '{schema_name}' AND tc.table_name = '{t_name}'; |
| 238 | + SELECT KCU.TABLE_NAME, TC.CONSTRAINT_NAME, TC.CONSTRAINT_TYPE, KCU.COLUMN_NAME, CC.CHECK_CLAUSE |
| 239 | + FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS TC |
| 240 | + LEFT JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS KCU ON TC.CONSTRAINT_NAME = KCU.CONSTRAINT_NAME AND TC.TABLE_SCHEMA = KCU.TABLE_SCHEMA AND TC.TABLE_NAME = KCU.TABLE_NAME |
| 241 | + LEFT JOIN INFORMATION_SCHEMA.CHECK_CONSTRAINTS AS CC ON TC.CONSTRAINT_NAME = CC.CONSTRAINT_NAME AND TC.CONSTRAINT_SCHEMA = CC.CONSTRAINT_SCHEMA |
| 242 | + WHERE TC.TABLE_SCHEMA = '{schema_name}' AND KCU.TABLE_NAME = '{t_name}'; |
243 | 243 | """ |
244 | 244 | details["tables"][t_name]["constraints"] = _execute_query( |
245 | 245 | conn, constraints_query |
246 | 246 | ) |
| 247 | + |
247 | 248 | indexes_query = f""" |
248 | | - SELECT |
249 | | - t.relname AS table_name, i.relname AS index_name, a.attname AS column_name, ix.indisunique AS is_unique |
250 | | - FROM pg_class t JOIN pg_index ix ON t.oid = ix.indrelid JOIN pg_class i ON i.oid = ix.indexrelid |
251 | | - LEFT JOIN pg_attribute a ON a.attrelid = t.oid AND a.attnum = ANY(ix.indkey) |
252 | | - JOIN pg_namespace n ON t.relnamespace = n.oid WHERE n.nspname = '{schema_name}' AND t.relname = '{t_name}' AND t.relkind = 'r'; |
| 249 | + SELECT t.name AS table_name, ind.name AS index_name, COL_NAME(ic.object_id, ic.column_id) AS column_name, ind.is_unique |
| 250 | + FROM sys.indexes ind INNER JOIN sys.index_columns ic ON ind.object_id = ic.object_id AND ind.index_id = ic.index_id |
| 251 | + INNER JOIN sys.tables t ON ind.object_id = t.object_id INNER JOIN sys.schemas s ON t.schema_id = s.schema_id |
| 252 | + WHERE s.name = '{schema_name}' AND t.name = '{t_name}' AND ind.is_hypothetical = 0 AND ind.type > 0; |
253 | 253 | """ |
254 | 254 | try: |
255 | 255 | indexes = _execute_query(conn, indexes_query) |
256 | 256 | grouped_indexes = {} |
257 | 257 | for index in indexes: |
258 | | - if index["column_name"]: |
259 | | - idx_name = index["index_name"] |
260 | | - if idx_name not in grouped_indexes: |
261 | | - grouped_indexes[idx_name] = { |
262 | | - "name": idx_name, |
263 | | - "columns": [], |
264 | | - "unique": index["is_unique"], |
265 | | - } |
266 | | - if index["column_name"] not in grouped_indexes[idx_name]["columns"]: |
267 | | - grouped_indexes[idx_name]["columns"].append( |
268 | | - index["column_name"] |
269 | | - ) |
| 258 | + idx_name = index["index_name"] |
| 259 | + if not idx_name: |
| 260 | + continue |
| 261 | + if idx_name not in grouped_indexes: |
| 262 | + grouped_indexes[idx_name] = { |
| 263 | + "name": idx_name, |
| 264 | + "columns": [], |
| 265 | + "unique": index["is_unique"], |
| 266 | + } |
| 267 | + if index["column_name"] not in grouped_indexes[idx_name]["columns"]: |
| 268 | + grouped_indexes[idx_name]["columns"].append(index["column_name"]) |
270 | 269 | details["tables"][t_name]["indexes"] = list(grouped_indexes.values()) |
271 | 270 | except Exception as e: |
272 | | - logger.error(f"Error fetching PostgreSQL indexes for {t_name}: {e}") |
| 271 | + logger.error(f"Error fetching MSSQL indexes for {t_name}: {e}") |
273 | 272 |
|
274 | 273 | fks_query = f""" |
275 | | - SELECT |
276 | | - tc.constraint_name, tc.table_name AS from_table, kcu.column_name AS from_column, |
277 | | - ccu.table_schema AS to_schema, ccu.table_name AS to_table, ccu.column_name AS to_column |
278 | | - FROM information_schema.table_constraints AS tc JOIN information_schema.key_column_usage AS kcu |
279 | | - ON tc.constraint_name = kcu.constraint_name AND tc.table_schema = kcu.table_schema |
280 | | - JOIN information_schema.constraint_column_usage AS ccu |
281 | | - ON ccu.constraint_name = tc.constraint_name AND ccu.table_schema = tc.table_schema |
282 | | - WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_schema = '{schema_name}'; |
| 274 | + SELECT KCU1.CONSTRAINT_NAME AS constraint_name, KCU1.TABLE_NAME AS from_table, KCU1.COLUMN_NAME AS from_column, |
| 275 | + KCU2.TABLE_SCHEMA AS to_schema, KCU2.TABLE_NAME AS to_table, KCU2.COLUMN_NAME AS to_column |
| 276 | + FROM INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS RC |
| 277 | + JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU1 ON KCU1.CONSTRAINT_SCHEMA = RC.CONSTRAINT_SCHEMA AND KCU1.CONSTRAINT_NAME = RC.CONSTRAINT_NAME |
| 278 | + JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE KCU2 ON KCU2.CONSTRAINT_SCHEMA = RC.UNIQUE_CONSTRAINT_SCHEMA AND KCU2.CONSTRAINT_NAME = RC.UNIQUE_CONSTRAINT_NAME AND KCU2.ORDINAL_POSITION = KCU1.ORDINAL_POSITION |
| 279 | + WHERE KCU1.TABLE_SCHEMA = '{schema_name}'; |
283 | 280 | """ |
284 | 281 | details["foreign_keys"] = _execute_query(conn, fks_query) |
285 | | - views_query = f"SELECT table_name AS view_name, view_definition FROM information_schema.views WHERE table_schema = '{schema_name}';" |
| 282 | + views_query = f"SELECT TABLE_NAME AS view_name, VIEW_DEFINITION FROM INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema_name}';" |
286 | 283 | details["views"] = { |
287 | | - view["view_name"]: {"definition": view["view_definition"]} |
| 284 | + view["view_name"]: {"definition": view["VIEW_DEFINITION"]} |
288 | 285 | for view in _execute_query(conn, views_query) |
289 | 286 | } |
290 | 287 |
|
291 | | - llm_analysis = _analyze_with_llm(schema_name, "PostgreSQL", details) |
| 288 | + llm_analysis = _analyze_with_llm(schema_name, "Microsoft SQL Server", details) |
292 | 289 | details["inferred_relationships"] = llm_analysis.get("inferred_relationships", []) |
293 | 290 | details["anomalies"] = llm_analysis.get("anomalies", []) |
294 | 291 | logger.info( |
295 | | - f"Found {len(details['inferred_relationships'])} potential inferred relationships for PostgreSQL." |
| 292 | + f"Found {len(details['inferred_relationships'])} potential inferred relationships for MSSQL." |
296 | 293 | ) |
297 | 294 | logger.info( |
298 | | - f"Found {len(details['anomalies'])} potential relationship anomalies for PostgreSQL." |
| 295 | + f"Found {len(details['anomalies'])} potential relationship anomalies for MSSQL." |
299 | 296 | ) |
300 | 297 | return details |
0 commit comments