1
+ # BUSINESS SCIENCE GENERATIVE AI/ML TIPS ----
2
+ # AI-TIP 002 | AI/ML FOR CUSTOMER CHURN ----
1
3
2
-
4
+ # GOALS:
5
+ # - Use LLMs to generate summaries of customer tickets
6
+ # - Use Text Embeddings to convert text to vectors
7
+ # - Use XGBoost to predict customer churn with AI features
3
8
4
9
# Libraries
5
10
18
23
import ast
19
24
import matplotlib .pyplot as plt
20
25
21
-
22
-
23
26
# ---------------------------
24
27
# 1. Setup
25
28
# ---------------------------
26
29
30
+ # PATHS
31
+ PATH_ROOT = "002_customer_churn_ai_ml/"
32
+
27
33
# MODELS
28
34
LLM_MODEL = "gpt-4o-mini"
29
35
EMBEDDING_MODEL = "text-embedding-ada-002"
35
41
client = OpenAI (api_key = os .environ ['OPENAI_API_KEY' ])
36
42
37
43
# DATASET
38
- df = pd .read_csv ("temp/customer_churn_ai_ml /data/customer_churn.csv" )
44
+ df = pd .read_csv (PATH_ROOT + " /data/customer_churn.csv" )
39
45
40
46
# ---------------------------
41
47
# 2. Generate Summaries with an LLM
46
52
def summarize_ticket (ticket_text ):
47
53
prompt = f"Summarize the following customer ticket focusing on the main complaint or request:\n \n { ticket_text } \n \n Summary:"
48
54
response = client .chat .completions .create (
49
- model = "gpt-4o-mini" ,
55
+ model = LLM_MODEL ,
50
56
messages = [
51
57
{"role" : "system" , "content" : "You are a helpful assistant." },
52
58
{"role" : "user" , "content" : prompt }
@@ -58,9 +64,9 @@ def summarize_ticket(ticket_text):
58
64
59
65
df ['ticket_summary' ] = df ['ticket_notes' ].apply (summarize_ticket )
60
66
61
- # df.to_csv("temp/customer_churn_ai_ml /data/customer_churn_summary.csv", index=False)
67
+ # df.to_csv(PATH_ROOT + " /data/customer_churn_summary.csv", index=False)
62
68
63
- df = pd .read_csv ("temp/customer_churn_ai_ml /data/customer_churn_summary.csv" )
69
+ df = pd .read_csv ( PATH_ROOT + " /data/customer_churn_summary.csv" )
64
70
df
65
71
66
72
# ---------------------------
@@ -80,9 +86,9 @@ def get_embeddings(text):
80
86
81
87
df ['summary_embedding' ] = df ['ticket_summary' ].apply (get_embeddings )
82
88
83
- # df.to_csv("temp/customer_churn_ai_ml /data/customer_churn_summary_embeddings.csv", index=False)
89
+ # df.to_csv(PATH_ROOT + " /data/customer_churn_summary_embeddings.csv", index=False)
84
90
85
- df = pd .read_csv ("temp/customer_churn_ai_ml /data/customer_churn_summary_embeddings.csv" )
91
+ df = pd .read_csv (PATH_ROOT + " /data/customer_churn_summary_embeddings.csv" )
86
92
87
93
df
88
94
0 commit comments