Skip to content

Commit 8958373

Browse files
committed
working pipeline tested for our chosen models
1 parent 0e73435 commit 8958373

File tree

7 files changed

+2273
-49
lines changed

7 files changed

+2273
-49
lines changed

Expt_Karthik_1/input.csv

+201
Large diffs are not rendered by default.

Expt_Karthik_1/predictions_0.csv

+601
Large diffs are not rendered by default.

Expt_Karthik_1/predictions_1.csv

+629
Large diffs are not rendered by default.

Expt_Karthik_1/settings.json

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
This is runing on gemma and llama judge with qwen and llama as query models
2+
{"user":"Karthik","expt_id":"1"}
3+
{"query_models":[("text-generation", "Qwen/QwQ-32B", "instructional_agent", "instructional_agent"), ("text-generation", "meta-llama/Llama-3.1-8B-Instruct", "instructional_agent", "instructional_agent")]}
4+
{"judges":[("text-generation", "meta-llama/Llama-3.1-8B-Instruct", "instructional_agent", "instructional_agent"), ("text-generation", "google/gemma-2-2b-it", "instructional_agent", "instructional_agent")]}

app.py

+59-28
Original file line numberDiff line numberDiff line change
@@ -3,63 +3,94 @@
33
from transformers import pipeline
44
from QueryFormatters import get_query_formatter
55
from QueryModels import query_model
6-
6+
import os
77

88

99
## explanation of the format given for each model,
1010
## transformer_model_type,model_name,query_format_type,output_format_type
1111

12-
input_file = "test.csv"
12+
input_file = "test.tsv"
1313
output_file = "test_output.csv"
1414
query_models = [
15-
("text-generation","google/gemma-2-2b-it","instructional_agent","instructional_agent"),]
15+
("text-generation","Qwen/QwQ-32B","instructional_agent","instructional_agent"),
16+
("text-generation","meta-llama/Llama-3.1-8B-Instruct","instructional_agent","instructional_agent"),
17+
]
1618

1719
judges = [
1820
("text-generation","meta-llama/Llama-3.1-8B-Instruct","instructional_agent","instructional_agent"),
1921
("text-generation","google/gemma-2-2b-it","instructional_agent","instructional_agent"),]
2022
enable_judges = True
2123
enable_prediction = True
22-
24+
description = " This is runing on gemma and llama judge with qwen and llama as query models"
25+
user="Karthik"
26+
expt_id = "1"
2327

2428
def string_builder(template, **kwargs):
2529
return template.format(**kwargs)
30+
df = pd.read_csv(input_file,sep="\t")
31+
32+
folder = "Expt_"+user+"_"+expt_id
33+
34+
files_to_judge =[]
35+
36+
if(os.path.exists(expt_id)):
37+
print("Experiment folder already exists")
38+
exit()
39+
2640

27-
df = pd.read_csv(input_file, usecols=["Prompt ID","Prompt","Error","Error Type","Error Salience","Model Response"])
28-
# generate responses
29-
enable_prediction = False
41+
42+
for x in enumerate(query_models):
43+
print(x)
44+
45+
46+
os.mkdir(folder)
47+
df.to_csv(folder+"/input.csv",index=False)
48+
with open (folder+"/settings.json","w") as f:
49+
f.write(f"{description}\n")
50+
f.write('{"user":"'+user+'","expt_id":"'+expt_id+'"}'+'\n')
51+
f.write('{"query_models":'+str(query_models)+'}'+'\n')
52+
f.write('{"judges":'+str(judges)+'}'+'\n')
53+
54+
55+
enable_prediction = True
3056
if(enable_prediction):
57+
idx = 0
3158
for transformer_model_type,model_name,query_format_type,output_format_type in query_models:
3259
query_formatter = get_query_formatter(query_type=query_format_type)
3360
model = query_model(model_name=model_name,transformer_model=transformer_model_type)
3461
responses = []
3562
queries = [query_formatter(prompt) for prompt in df["Prompt"]]
3663
responses = model.predict_withformatting(queries,"instructional_agent")
3764
df["Model Response"] = responses
38-
df.to_csv(model_name+output_file, index=False)
65+
file_name = folder+"/predictions_"+str(idx)+".csv"
66+
files_to_judge.append(file_name)
67+
df.to_csv(file_name, index=False)
68+
idx+=1
3969
print("Predictions saved to ",output_file)
4070

4171
enable_judges = True
42-
4372
if (enable_judges):
44-
for judge_name,query_format in judges:
45-
query_formatter = get_query_formatter("instructional_agent")
46-
model = query_model(model_name=judge_name,transformer_model="text-generation")
47-
# Type 1: Error acknowledgement
48-
prompt_template = "Find out if this model response {response} has acknowledged the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word"
49-
errors = df["Error"]
50-
responses = df["Model Response"]
51-
type1_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])]
52-
type1_queries = [query_formatter(prompt) for prompt in type1_prompts]
53-
type1_responses = model.predict_withformatting(type1_queries,"instructional_agent")
54-
df[judge_name+"_predictions_error_acknowledgement"] = type1_responses
55-
#Type 2: Error correction
56-
prompt_template = "Find out if this model response {response} has corrected the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word"
57-
type2_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])]
58-
type2_queries = [query_formatter(prompt) for prompt in type2_prompts]
59-
type2_responses = model.predict_withformatting(type2_queries,"instructional_agent")
60-
df[judge_name+"_predictions_error_correction"] = type2_responses
61-
df.to_csv(output_file, index=False)
62-
print("Evaluations saved to ",output_file)
73+
for file_to_judge in files_to_judge:
74+
df = pd.read_csv(file_to_judge)
75+
for transformer_model_type,model_name,query_format_type,output_format_type in judges:
76+
query_formatter = get_query_formatter("instructional_agent")
77+
model = query_model(model_name=model_name,transformer_model="text-generation")
78+
# Type 1: Error acknowledgement
79+
prompt_template = "Find out if this model response {response} has acknowledged the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word"
80+
errors = df["Error"].tolist()
81+
responses = df["Model Response"].tolist()
82+
type1_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])]
83+
type1_queries = [query_formatter(prompt) for prompt in type1_prompts]
84+
type1_responses = model.predict_withformatting(type1_queries,"instructional_agent")
85+
df[model_name+"_predictions_error_acknowledgement"] = type1_responses
86+
#Type 2: Error correction
87+
prompt_template = "Find out if this model response {response} has corrected the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word"
88+
type2_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])]
89+
type2_queries = [query_formatter(prompt) for prompt in type2_prompts]
90+
type2_responses = model.predict_withformatting(type2_queries,"instructional_agent")
91+
df[model_name+"_predictions_error_correction"] = type2_responses
92+
df.to_csv(file_to_judge, index=False)
93+
print("Evaluations saved to ")
6394

6495

6596

0 commit comments

Comments
 (0)