-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathphp.py
162 lines (128 loc) · 6.31 KB
/
php.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import datetime
import pandas as pd
import numpy as np
import torch
from torch import nn
from transformers import DistilBertTokenizer, DistilBertModel, DistilBertConfig
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
# Initialization and Data Preparation
embedding_dim = 128
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
# Load the configuration for DistilBERT
config = DistilBertConfig(dropout=0.2, attention_dropout=0.2)
dbert_model = DistilBertModel.from_pretrained('distilbert-base-uncased', config=config)
# Freeze all the parameters in the DistilBERT model.
for param in dbert_model.parameters():
param.requires_grad = False
# Load your dataset. Assuming it's a CSV file with columns 'conversation', 'model_a', 'model_b', 'winner'
datapath = 'pair_wise.csv'
df = pd.read_csv(datapath)
unique_model_names = list(set(df['model_a'].unique()).union(set(df['model_b'].unique())))
model2id = {model: idx for idx, model in enumerate(unique_model_names)}
class PairWiseDataSet(Dataset):
def __init__(self, dataframe, tokenizer, model2id, max_length=512):
self.dataframe = dataframe
self.tokenizer = tokenizer
self.model2id = model2id
self.max_length = max_length
def __len__(self):
return len(self.dataframe)
def __getitem__(self, idx):
row = self.dataframe.iloc[idx]
text = row['conversation']
model_a_id = self.model2id[row['model_a']]
model_b_id = self.model2id[row['model_b']]
winner_id = self.model2id[row['winner']]
encoding = self.tokenizer(
text,
padding='max_length',
truncation=True,
max_length=self.max_length,
return_tensors='pt'
)
input_ids = encoding['input_ids'].squeeze(0)
attention_mask = encoding['attention_mask'].squeeze(0)
return input_ids, attention_mask, torch.tensor([model_a_id, model_b_id]), torch.tensor(winner_id)
# Split the dataset
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)
train_dataset = PairWiseDataSet(train_df, tokenizer, model2id)
test_dataset = PairWiseDataSet(test_df, tokenizer, model2id)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)
class PairwiseClassifier(nn.Module):
def __init__(self, dbert_model, num_models, embedding_dim):
super(PairwiseClassifier, self).__init__()
self.dbert_model = dbert_model
self.model_embeddings = nn.Embedding(num_models, embedding_dim)
self.classifier = nn.Sequential(
nn.Linear(dbert_model.config.hidden_size + 2 * embedding_dim, 768),
nn.ReLU(),
nn.Dropout(0.1),
nn.Linear(768, num_models)
)
def forward(self, input_ids, attention_mask, model_pair_ids):
outputs = self.dbert_model(input_ids=input_ids, attention_mask=attention_mask)
pooled_output = outputs.last_hidden_state[:, 0]
model_embeddings = self.model_embeddings(model_pair_ids)
model_embeddings = model_embeddings.view(model_embeddings.size(0), -1)
combined = torch.cat((pooled_output, model_embeddings), 1)
logits = self.classifier(combined)
return logits
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Adjust for Metal GPU
device = "mps" if torch.backends.mps.is_available() else "cpu"
print(f"Using {device} device")
model = PairwiseClassifier(dbert_model, len(model2id), embedding_dim).to(device)
# Measure time for training
start_time = datetime.datetime.now()
# Training loop
epochs = 5 # Number of epochs
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001) # Define the optimizer
criterion = nn.CrossEntropyLoss() # Define the loss function
for epoch in range(epochs):
model.train()
train_loss = 0.0
correct_predictions = 0
total_predictions = 0
train_loop = tqdm(train_loader, position=0, leave=True, desc=f'Epoch {epoch+1}/{epochs} [Training]')
for inputs, attention_mask, model_pairs, labels in train_loop:
inputs, attention_mask, model_pairs, labels = inputs.to(device), attention_mask.to(device), model_pairs.to(device), labels.to(device)
optimizer.zero_grad() # Zero the gradients
outputs = model(inputs, attention_mask, model_pairs) # Forward pass
loss = criterion(outputs, labels) # Compute the loss
loss.backward() # Backward pass
optimizer.step() # Update weights
train_loss += loss.item()
_, preds = torch.max(outputs, dim=1)
correct_predictions += torch.sum(preds == labels).item()
total_predictions += labels.size(0)
train_loop.set_postfix(loss=(train_loss / (train_loop.n + 1)))
train_accuracy = correct_predictions / total_predictions
# Validation loop
model.eval() # Set the model to evaluation mode
val_loss = 0.0
correct_predictions = 0
total_predictions = 0
val_loop = tqdm(test_loader, position=0, leave=True, desc=f'Epoch {epoch+1}/{epochs} [Validation]')
with torch.no_grad():
for inputs, attention_mask, model_pairs, labels in val_loop:
inputs, attention_mask, model_pairs, labels = inputs.to(device), attention_mask.to(device), model_pairs.to(device), labels.to(device)
outputs = model(inputs, attention_mask, model_pairs)
loss = criterion(outputs, labels)
val_loss += loss.item()
_, preds = torch.max(outputs, dim=1)
correct_predictions += torch.sum(preds == labels).item()
total_predictions += labels.size(0)
val_loop.set_postfix(loss=(val_loss / (val_loop.n + 1)))
val_accuracy = correct_predictions / total_predictions
# Print formatted loss and accuracy
print(f'\nEpoch {epoch+1} \t Training Loss: {train_loss / len(train_loader):.3f} \t Validation Loss: {val_loss / len(test_loader):.3f}')
print(f'\t Training Accuracy: {train_accuracy:.3%} \t Validation Accuracy: {val_accuracy:.3%}')
# Save the model
torch.save(model.state_dict(), 'pairwise_classifier_model.pth')
print('Model saved to pairwise_classifier_model.pth')
# Measure training time
end_time = datetime.datetime.now()
print(f'Training completed in: {end_time - start_time}')