Skip to content

Commit d115526

Browse files
authored
Replace logged loss .item() with .detach().item() (#2584)
1 parent 0445bc2 commit d115526

13 files changed

+17
-17
lines changed

Diff for: recipes/dev/early_exit_finetune_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -961,7 +961,7 @@ def train(self) -> None:
961961
# Update the number of steps when the weights are updated
962962
self.global_step += 1
963963

964-
loss_to_log = running_loss.item() / num_tokens
964+
loss_to_log = running_loss.detach().item() / num_tokens
965965
pbar.update(1)
966966
pbar.set_description(
967967
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/dev/lora_finetune_distributed_multi_dataset.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -866,7 +866,7 @@ def train(self) -> None:
866866
# Update the number of steps when the weights are updated
867867
self.global_step += 1
868868

869-
loss_to_log = running_loss.item() / num_tokens
869+
loss_to_log = running_loss.detach().item() / num_tokens
870870
pbar.update(1)
871871
pbar.set_description(
872872
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/full_dpo_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -981,7 +981,7 @@ def train(self) -> None:
981981
# Step the learning rate scheduler
982982
if self._lr_scheduler is not None:
983983
self._lr_scheduler.step()
984-
loss_to_log = running_loss.item()
984+
loss_to_log = running_loss.detach().item()
985985
pbar.update(1)
986986
pbar.set_description(
987987
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/full_finetune_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -851,7 +851,7 @@ def train(self) -> None:
851851
if self._lr_scheduler is not None:
852852
self._lr_scheduler.step()
853853

854-
loss_to_log = running_loss.item() / num_tokens
854+
loss_to_log = running_loss.detach().item() / num_tokens
855855
pbar.update(1)
856856
pbar.set_description(
857857
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/full_finetune_single_device.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ def train(self) -> None:
717717
self._lr_scheduler.step()
718718
self.global_step += 1
719719

720-
loss_to_log = running_loss.item() / num_tokens
720+
loss_to_log = running_loss.detach().item() / num_tokens
721721
pbar.update(1)
722722
pbar.set_description(
723723
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/knowledge_distillation_distributed.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -881,16 +881,16 @@ def train(self) -> None:
881881
# Manually scale the gradients from unnormalized loss by total # of tokens
882882
# We multiply by world_size to undo FSDP2 gradient normalization.
883883
training.scale_grads(self._model, self.world_size / num_tokens)
884-
class_loss_to_log = running_class_loss.item() / num_tokens
885-
kd_loss_to_log = running_kd_loss.item() / num_tokens
884+
class_loss_to_log = running_class_loss.detach().item() / num_tokens
885+
kd_loss_to_log = running_kd_loss.detach().item() / num_tokens
886886
self._optimizer.step()
887887
self._optimizer.zero_grad(set_to_none=True)
888888
self._lr_scheduler.step()
889889
# Update the number of steps when the weights are updated
890890
self.global_step += 1
891891

892-
class_loss_to_log = class_loss.item()
893-
kd_loss_to_log = kd_loss.item()
892+
class_loss_to_log = class_loss.detach().item()
893+
kd_loss_to_log = kd_loss.detach().item()
894894
loss_to_log = (
895895
1 - self._kd_ratio
896896
) * class_loss_to_log + self._kd_ratio * kd_loss_to_log

Diff for: recipes/knowledge_distillation_single_device.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -738,8 +738,8 @@ def train(self) -> None:
738738
# Update the number of steps when the weights are updated
739739
self.global_step += 1
740740

741-
class_loss_to_log = running_class_loss.item() / num_tokens
742-
kd_loss_to_log = running_kd_loss.item() / num_tokens
741+
class_loss_to_log = running_class_loss.detach().item() / num_tokens
742+
kd_loss_to_log = running_kd_loss.detach().item() / num_tokens
743743
loss_to_log = (
744744
1 - self._kd_ratio
745745
) * class_loss_to_log + self._kd_ratio * kd_loss_to_log

Diff for: recipes/lora_dpo_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -775,7 +775,7 @@ def train(self) -> None:
775775
# Update the number of steps when the weights are updated
776776
self.global_step += 1
777777

778-
loss_to_log = running_loss.item()
778+
loss_to_log = running_loss.detach().item()
779779
pbar.update(1)
780780
pbar.set_description(
781781
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/lora_dpo_single_device.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ def train(self) -> None:
579579
# Update the number of steps when the weights are updated
580580
self.global_step += 1
581581

582-
loss_to_log = running_loss.item()
582+
loss_to_log = running_loss.detach().item()
583583
pbar.update(1)
584584
pbar.set_description(
585585
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/lora_finetune_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -832,7 +832,7 @@ def train(self) -> None:
832832
# Update the number of steps when the weights are updated
833833
self.global_step += 1
834834

835-
loss_to_log = running_loss.item() / num_tokens
835+
loss_to_log = running_loss.detach().item() / num_tokens
836836
pbar.update(1)
837837
pbar.set_description(
838838
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/lora_finetune_single_device.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ def train(self) -> None:
720720
# Update the number of steps when the weights are updated
721721
self.global_step += 1
722722

723-
loss_to_log = running_loss.item() / num_tokens
723+
loss_to_log = running_loss.detach().item() / num_tokens
724724
pbar.update(1)
725725
pbar.set_description(
726726
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/qat_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -865,7 +865,7 @@ def train(self) -> None:
865865
# Update the number of steps when the weights are updated
866866
self.global_step += 1
867867

868-
loss_to_log = running_loss.item() / num_tokens
868+
loss_to_log = running_loss.detach().item() / num_tokens
869869
pbar.update(1)
870870
pbar.set_description(
871871
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

Diff for: recipes/qat_lora_finetune_distributed.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,7 @@ def train(self) -> None:
882882
# Update the number of steps when the weights are updated
883883
self.global_step += 1
884884

885-
loss_to_log = running_loss.item() / num_tokens
885+
loss_to_log = running_loss.detach().item() / num_tokens
886886
pbar.update(1)
887887
pbar.set_description(
888888
f"{curr_epoch + 1}|{self.global_step}|Loss: {loss_to_log}"

0 commit comments

Comments
 (0)