business-science
diff --git a/‎temp/customer_churn_ai_ml/customer_churn_ai_ml.py ‎002_customer_churn_ai_ml/customer_churn_ai_ml.py
+15-9 b/‎temp/customer_churn_ai_ml/customer_churn_ai_ml.py ‎002_customer_churn_ai_ml/customer_churn_ai_ml.py
+15-9
diff --git a/‎temp/customer_churn_ai_ml/data/customer_churn.csv ‎002_customer_churn_ai_ml/data/customer_churn.csv b/‎temp/customer_churn_ai_ml/data/customer_churn.csv ‎002_customer_churn_ai_ml/data/customer_churn.csv
diff --git a/‎temp/customer_churn_ai_ml/data/customer_churn_summary.csv ‎002_customer_churn_ai_ml/data/customer_churn_summary.csv b/‎temp/customer_churn_ai_ml/data/customer_churn_summary.csv ‎002_customer_churn_ai_ml/data/customer_churn_summary.csv
diff --git a/‎temp/customer_churn_ai_ml/data/customer_churn_summary_embeddings.csv ‎002_customer_churn_ai_ml/data/customer_churn_summary_embeddings.csv b/‎temp/customer_churn_ai_ml/data/customer_churn_summary_embeddings.csv ‎002_customer_churn_ai_ml/data/customer_churn_summary_embeddings.csv
diff --git a/‎002_csv_semantic_search/002_CSV_Semantic_Search.ipynb ‎temp/002_csv_semantic_search/002_CSV_Semantic_Search.ipynb b/‎002_csv_semantic_search/002_CSV_Semantic_Search.ipynb ‎temp/002_csv_semantic_search/002_CSV_Semantic_Search.ipynb
diff --git a/‎002_csv_semantic_search/002_csv_semantic_search.py ‎temp/002_csv_semantic_search/002_csv_semantic_search.py b/‎002_csv_semantic_search/002_csv_semantic_search.py ‎temp/002_csv_semantic_search/002_csv_semantic_search.py
diff --git a/‎002_csv_semantic_search/data/business_data.csv ‎temp/002_csv_semantic_search/data/business_data.csv b/‎002_csv_semantic_search/data/business_data.csv ‎temp/002_csv_semantic_search/data/business_data.csv
diff --git a/‎002_csv_semantic_search/data/create_data.py ‎temp/002_csv_semantic_search/data/create_data.py b/‎002_csv_semantic_search/data/create_data.py ‎temp/002_csv_semantic_search/data/create_data.py
@@ -1,5 +1,10 @@
+# BUSINESS SCIENCE GENERATIVE AI/ML TIPS ----
+# AI-TIP 002 | AI/ML FOR CUSTOMER CHURN ----
 
-
+# GOALS: 
+# - Use LLMs to generate summaries of customer tickets
+# - Use Text Embeddings to convert text to vectors
+# - Use XGBoost to predict customer churn with AI features
 
 # Libraries
 
@@ -18,12 +23,13 @@
 import ast
 import matplotlib.pyplot as plt
 
-
-
 # ---------------------------
 # 1. Setup
 # ---------------------------
 
+# PATHS
+PATH_ROOT = "002_customer_churn_ai_ml/"
+
 # MODELS
 LLM_MODEL = "gpt-4o-mini"
 EMBEDDING_MODEL = "text-embedding-ada-002"
@@ -35,7 +41,7 @@
 client = OpenAI(api_key=os.environ['OPENAI_API_KEY'])
 
 # DATASET 
-df = pd.read_csv("temp/customer_churn_ai_ml/data/customer_churn.csv")
+df = pd.read_csv(PATH_ROOT + "/data/customer_churn.csv")
 
 # ---------------------------
 # 2. Generate Summaries with an LLM
@@ -46,7 +52,7 @@
 def summarize_ticket(ticket_text):
     prompt = f"Summarize the following customer ticket focusing on the main complaint or request:\n\n{ticket_text}\n\nSummary:"
     response = client.chat.completions.create(
-        model="gpt-4o-mini",
+        model=LLM_MODEL,
         messages=[
             {"role": "system", "content": "You are a helpful assistant."},
             {"role": "user", "content": prompt}
@@ -58,9 +64,9 @@ def summarize_ticket(ticket_text):
 
 df['ticket_summary'] = df['ticket_notes'].apply(summarize_ticket)
 
-# df.to_csv("temp/customer_churn_ai_ml/data/customer_churn_summary.csv", index=False)
+# df.to_csv(PATH_ROOT + "/data/customer_churn_summary.csv", index=False)
 
-df = pd.read_csv("temp/customer_churn_ai_ml/data/customer_churn_summary.csv")
+df = pd.read_csv( PATH_ROOT + "/data/customer_churn_summary.csv")
 df
 
 # ---------------------------
@@ -80,9 +86,9 @@ def get_embeddings(text):
 
 df['summary_embedding'] = df['ticket_summary'].apply(get_embeddings)
 
-# df.to_csv("temp/customer_churn_ai_ml/data/customer_churn_summary_embeddings.csv", index=False)
+# df.to_csv(PATH_ROOT + "/data/customer_churn_summary_embeddings.csv", index=False)
 
-df = pd.read_csv("temp/customer_churn_ai_ml/data/customer_churn_summary_embeddings.csv")
+df = pd.read_csv(PATH_ROOT + "/data/customer_churn_summary_embeddings.csv")
 
 df