2020
2121import argparse
2222import csv
23- from curses import raw
2423import json
2524import sys
2625import time
2726import yaml
2827import pickle
28+ from decimal import Decimal
2929from pathlib import Path
3030from typing import List , Dict , Any , Optional
3131from datetime import datetime
3232from uuid import UUID
33- from decimal import Decimal
3433
3534try :
3635 from cassandra .cluster import Cluster , ExecutionProfile , EXEC_PROFILE_DEFAULT
@@ -271,10 +270,9 @@ def generate_embedding_for_text(self, text: str, target_dimensions: int) -> Opti
271270 embedding = self .embedder .generate_embedding (text )
272271
273272 # Reduce to target dimensions
274- # reduced = self.embedder.reduce_dimensions(embedding, target_dimensions)
273+ reduced = self .embedder .reduce_dimensions (embedding , target_dimensions )
275274
276- #return reduced.flatten().tolist()
277- return embedding
275+ return reduced .flatten ().tolist ()
278276
279277 except Exception as e :
280278 log_warning (f"Failed to generate embedding: { e } " )
@@ -389,8 +387,8 @@ def load_table_with_embeddings(self, table_name: str, csv_path: Path) -> tuple[i
389387 rows_failed = 0
390388 embeddings_generated = 0
391389
392- # try:
393- with open (csv_path , 'r' , encoding = 'utf-8' ) as f :
390+ try :
391+ with open (csv_path , 'r' , encoding = 'utf-8' ) as f :
394392 reader = csv .DictReader (f )
395393 columns = list (reader .fieldnames )
396394
@@ -414,7 +412,7 @@ def load_table_with_embeddings(self, table_name: str, csv_path: Path) -> tuple[i
414412 batch = []
415413
416414 for row in reader :
417- # try:
415+ try :
418416 parsed_values = []
419417 text_for_embedding = None
420418
@@ -441,12 +439,10 @@ def load_table_with_embeddings(self, table_name: str, csv_path: Path) -> tuple[i
441439
442440 # Generate embedding
443441 if text_for_embedding :
444- #print("Got here")
445442 embedding = self .generate_embedding_for_text (
446443 text_for_embedding ,
447444 vector_mappings [table_name ]['dimensions' ]
448445 )
449- #print("Got here2")
450446 parsed_values .append (embedding )
451447 if embedding is not None :
452448 embeddings_generated += 1
@@ -463,11 +459,9 @@ def load_table_with_embeddings(self, table_name: str, csv_path: Path) -> tuple[i
463459 # numeric
464460 value = row .get (col , '' )
465461 if '.' in value :
466- #float
467462 raw_value = float (Decimal (value ))
468463 parsed_values .append (raw_value )
469464 else :
470- #integer
471465 raw_value = int (value )
472466 parsed_values .append (raw_value )
473467 elif row .get (col , '' ).startswith ("[" ) and row .get (col , '' ).endswith ("]" ):
@@ -502,9 +496,9 @@ def load_table_with_embeddings(self, table_name: str, csv_path: Path) -> tuple[i
502496 status += f" ({ embeddings_generated } embeddings)"
503497 print (status + "..." , end = '\r ' )
504498
505- # except Exception as e:
506- # log_warning(f"Failed to process row: {e}")
507- # rows_failed += 1
499+ except Exception as e :
500+ log_warning (f"Failed to process row: { e } " )
501+ rows_failed += 1
508502
509503 # Execute remaining batch
510504 if batch :
@@ -522,9 +516,9 @@ def load_table_with_embeddings(self, table_name: str, csv_path: Path) -> tuple[i
522516
523517 return rows_loaded , rows_failed
524518
525- # except Exception as e:
526- # log_error(f"Failed to load {table_name}: {e}")
527- # return rows_loaded, rows_failed
519+ except Exception as e :
520+ log_error (f"Failed to load { table_name } : { e } " )
521+ return rows_loaded , rows_failed
528522
529523 def load_counter_table (self , table_name : str , csv_path : Path , update_query : str ) -> tuple [int , int ]:
530524 """Load counter table (same as before)"""
0 commit comments