|
3 | 3 | from transformers import pipeline
|
4 | 4 | from QueryFormatters import get_query_formatter
|
5 | 5 | from QueryModels import query_model
|
6 |
| - |
| 6 | +import os |
7 | 7 |
|
8 | 8 |
|
9 | 9 | ## explanation of the format given for each model,
|
10 | 10 | ## transformer_model_type,model_name,query_format_type,output_format_type
|
11 | 11 |
|
12 |
| -input_file = "test.csv" |
| 12 | +input_file = "test.tsv" |
13 | 13 | output_file = "test_output.csv"
|
14 | 14 | query_models = [
|
15 |
| - ("text-generation","google/gemma-2-2b-it","instructional_agent","instructional_agent"),] |
| 15 | + ("text-generation","Qwen/QwQ-32B","instructional_agent","instructional_agent"), |
| 16 | + ("text-generation","meta-llama/Llama-3.1-8B-Instruct","instructional_agent","instructional_agent"), |
| 17 | + ] |
16 | 18 |
|
17 | 19 | judges = [
|
18 | 20 | ("text-generation","meta-llama/Llama-3.1-8B-Instruct","instructional_agent","instructional_agent"),
|
19 | 21 | ("text-generation","google/gemma-2-2b-it","instructional_agent","instructional_agent"),]
|
20 | 22 | enable_judges = True
|
21 | 23 | enable_prediction = True
|
22 |
| - |
| 24 | +description = " This is runing on gemma and llama judge with qwen and llama as query models" |
| 25 | +user="Karthik" |
| 26 | +expt_id = "1" |
23 | 27 |
|
24 | 28 | def string_builder(template, **kwargs):
|
25 | 29 | return template.format(**kwargs)
|
| 30 | +df = pd.read_csv(input_file,sep="\t") |
| 31 | + |
| 32 | +folder = "Expt_"+user+"_"+expt_id |
| 33 | + |
| 34 | +files_to_judge =[] |
| 35 | + |
| 36 | +if(os.path.exists(expt_id)): |
| 37 | + print("Experiment folder already exists") |
| 38 | + exit() |
| 39 | + |
26 | 40 |
|
27 |
| -df = pd.read_csv(input_file, usecols=["Prompt ID","Prompt","Error","Error Type","Error Salience","Model Response"]) |
28 |
| -# generate responses |
29 |
| -enable_prediction = False |
| 41 | + |
| 42 | +for x in enumerate(query_models): |
| 43 | + print(x) |
| 44 | + |
| 45 | + |
| 46 | +os.mkdir(folder) |
| 47 | +df.to_csv(folder+"/input.csv",index=False) |
| 48 | +with open (folder+"/settings.json","w") as f: |
| 49 | + f.write(f"{description}\n") |
| 50 | + f.write('{"user":"'+user+'","expt_id":"'+expt_id+'"}'+'\n') |
| 51 | + f.write('{"query_models":'+str(query_models)+'}'+'\n') |
| 52 | + f.write('{"judges":'+str(judges)+'}'+'\n') |
| 53 | + |
| 54 | + |
| 55 | +enable_prediction = True |
30 | 56 | if(enable_prediction):
|
| 57 | + idx = 0 |
31 | 58 | for transformer_model_type,model_name,query_format_type,output_format_type in query_models:
|
32 | 59 | query_formatter = get_query_formatter(query_type=query_format_type)
|
33 | 60 | model = query_model(model_name=model_name,transformer_model=transformer_model_type)
|
34 | 61 | responses = []
|
35 | 62 | queries = [query_formatter(prompt) for prompt in df["Prompt"]]
|
36 | 63 | responses = model.predict_withformatting(queries,"instructional_agent")
|
37 | 64 | df["Model Response"] = responses
|
38 |
| - df.to_csv(model_name+output_file, index=False) |
| 65 | + file_name = folder+"/predictions_"+str(idx)+".csv" |
| 66 | + files_to_judge.append(file_name) |
| 67 | + df.to_csv(file_name, index=False) |
| 68 | + idx+=1 |
39 | 69 | print("Predictions saved to ",output_file)
|
40 | 70 |
|
41 | 71 | enable_judges = True
|
42 |
| - |
43 | 72 | if (enable_judges):
|
44 |
| - for judge_name,query_format in judges: |
45 |
| - query_formatter = get_query_formatter("instructional_agent") |
46 |
| - model = query_model(model_name=judge_name,transformer_model="text-generation") |
47 |
| - # Type 1: Error acknowledgement |
48 |
| - prompt_template = "Find out if this model response {response} has acknowledged the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word" |
49 |
| - errors = df["Error"] |
50 |
| - responses = df["Model Response"] |
51 |
| - type1_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])] |
52 |
| - type1_queries = [query_formatter(prompt) for prompt in type1_prompts] |
53 |
| - type1_responses = model.predict_withformatting(type1_queries,"instructional_agent") |
54 |
| - df[judge_name+"_predictions_error_acknowledgement"] = type1_responses |
55 |
| - #Type 2: Error correction |
56 |
| - prompt_template = "Find out if this model response {response} has corrected the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word" |
57 |
| - type2_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])] |
58 |
| - type2_queries = [query_formatter(prompt) for prompt in type2_prompts] |
59 |
| - type2_responses = model.predict_withformatting(type2_queries,"instructional_agent") |
60 |
| - df[judge_name+"_predictions_error_correction"] = type2_responses |
61 |
| - df.to_csv(output_file, index=False) |
62 |
| - print("Evaluations saved to ",output_file) |
| 73 | + for file_to_judge in files_to_judge: |
| 74 | + df = pd.read_csv(file_to_judge) |
| 75 | + for transformer_model_type,model_name,query_format_type,output_format_type in judges: |
| 76 | + query_formatter = get_query_formatter("instructional_agent") |
| 77 | + model = query_model(model_name=model_name,transformer_model="text-generation") |
| 78 | + # Type 1: Error acknowledgement |
| 79 | + prompt_template = "Find out if this model response {response} has acknowledged the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word" |
| 80 | + errors = df["Error"].tolist() |
| 81 | + responses = df["Model Response"].tolist() |
| 82 | + type1_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])] |
| 83 | + type1_queries = [query_formatter(prompt) for prompt in type1_prompts] |
| 84 | + type1_responses = model.predict_withformatting(type1_queries,"instructional_agent") |
| 85 | + df[model_name+"_predictions_error_acknowledgement"] = type1_responses |
| 86 | + #Type 2: Error correction |
| 87 | + prompt_template = "Find out if this model response {response} has corrected the following error {error} of the query {query}. If yes, say True else False ,make sure to print nothing else ,just one word" |
| 88 | + type2_prompts = [ string_builder(prompt_template, query=prompt, error=error,response = response) for prompt,error,response in zip(df["Prompt"],df["Error"],df["Model Response"])] |
| 89 | + type2_queries = [query_formatter(prompt) for prompt in type2_prompts] |
| 90 | + type2_responses = model.predict_withformatting(type2_queries,"instructional_agent") |
| 91 | + df[model_name+"_predictions_error_correction"] = type2_responses |
| 92 | + df.to_csv(file_to_judge, index=False) |
| 93 | + print("Evaluations saved to ") |
63 | 94 |
|
64 | 95 |
|
65 | 96 |
|
|
0 commit comments