Batch size mismatch during loss computation #14812
-
Hello, I have a pre-trained model that I had trained from scratch using Densenet Architecture with size output 2048. class DownstreamTask(pl.LightningModule):
def __init__(self, pre_model, lr=LR):
super().__init__()
self.network = pre_model
self.fc = nn.Sequential(nn.Linear(2048,22))
self.learning_rate = lr
def forward(self, x):
features = self.gaze_network(x)
features = features.view(features.size(0), -1)
gaze = self.fc(features)
return gaze
def training_step(self, batch):
x, y = batch
y_hat = self(x)
loss = F.l1_loss(y_hat, y)
self.log("my_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
return {'loss': loss}
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.learning_rate)
model = DenseNet()
model.load_state_dict(torch.load(PATH))
model.eval()
train_loader = DataLoader(TrainLoader(data_dir, batch_size, num_workers), batch_size=batch_size, shuffle=True, num_workers=int(num_workers))
learner = learner = DownstreamTask(model)
trainer = pl.Trainer(accelerator='gpu', devices=num_gpus, max_epochs=epochs, strategy='ddp', num_nodes=num_nodes,)
trainer.fit(learner, train_loader) Here is the error I got:
I have tried to resize/reshape but nothing seems to work. It looks like there is a batch size mismatch. |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
@Estabi I believe this is irrelevant to PL. Have you checked the sizes of |
Beta Was this translation helpful? Give feedback.
@Estabi I believe this is irrelevant to PL. Have you checked the sizes of
y_hat
andy
match? See PyTorch docs: https://pytorch.org/docs/1.12/generated/torch.nn.functional.l1_loss.html#torch.nn.functional.l1_loss