File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -2814,6 +2814,19 @@ def zero_grad(self):
28142814 for param_name , param in self .module .named_parameters ():
28152815 param .grad = None
28162816
2817+ def _eigenvalue_summary_events (self ):
2818+ if not (self .eigenvalue_enabled () and not self .gas_boundary_ctr % self .eigenvalue_gas_boundary_resolution ()):
2819+ return []
2820+
2821+ events = []
2822+ for i , ev_value in enumerate (self .block_eigenvalue .values ()):
2823+ events .append ((
2824+ f"Train/Eigenvalues/ModelBlockParam_{ i } " ,
2825+ ev_value [0 ],
2826+ self .global_samples ,
2827+ ))
2828+ return events
2829+
28172830 def clip_fp32_gradients (self ):
28182831 clip_grad_norm_ (parameters = self .module .parameters (), max_norm = self .gradient_clipping (), mpu = self .mpu )
28192832
@@ -2963,15 +2976,7 @@ def step(self, lr_kwargs=None):
29632976 self .global_samples ,
29642977 ))
29652978
2966- if (self .eigenvalue_enabled ()
2967- and not self .gas_boundary_ctr % self .eigenvalue_gas_boundary_resolution ()):
2968- ev_values = self .block_eigenvalue .values ()
2969- for i in range (len (ev_values )):
2970- self .summary_events .append ((
2971- f"Train/Eigenvalues/ModelBlockParam_{ i } " ,
2972- self .ev_values [i ][0 ],
2973- self .global_samples ,
2974- ))
2979+ self .summary_events .extend (self ._eigenvalue_summary_events ())
29752980 self .monitor .write_events (self .summary_events )
29762981
29772982 # Check flops profiling
Original file line number Diff line number Diff line change 1+ # Copyright (c) Microsoft Corporation.
2+ # SPDX-License-Identifier: Apache-2.0
3+
4+ from deepspeed .runtime .engine import DeepSpeedEngine
5+
6+
7+ def test_eigenvalue_summary_events_use_block_eigenvalue_values ():
8+ engine = object .__new__ (DeepSpeedEngine )
9+ engine .block_eigenvalue = {
10+ "block_a" : (0.25 , 0 ),
11+ "block_b" : (0.5 , 1 ),
12+ }
13+ engine .gas_boundary_ctr = 4
14+ engine .global_samples = 128
15+ engine .eigenvalue_enabled = lambda : True
16+ engine .eigenvalue_gas_boundary_resolution = lambda : 2
17+
18+ assert engine ._eigenvalue_summary_events () == [
19+ ("Train/Eigenvalues/ModelBlockParam_0" , 0.25 , 128 ),
20+ ("Train/Eigenvalues/ModelBlockParam_1" , 0.5 , 128 ),
21+ ]
22+
23+
24+ def test_eigenvalue_summary_events_skip_non_boundary_steps ():
25+ engine = object .__new__ (DeepSpeedEngine )
26+ engine .block_eigenvalue = {"block_a" : (0.25 , 0 )}
27+ engine .gas_boundary_ctr = 3
28+ engine .eigenvalue_enabled = lambda : True
29+ engine .eigenvalue_gas_boundary_resolution = lambda : 2
30+
31+ assert engine ._eigenvalue_summary_events () == []
You can’t perform that action at this time.
0 commit comments