Allow accelerator to instantiate the device (#5255)

nelyahu · loadams · tjruwase · web-flow · commit eb07d41f959b · 2024-08-15T15:01:27.000Z
when instantiating torch.device for HPU it cannot be fed with HPU:1
annotation, but only "HPU".
moving the logic to accelerator will allow to solve this issue, with
single line change.

---------

Co-authored-by: Logan Adams &lt;114770087+loadams@users.noreply.github.com&gt;
Co-authored-by: Olatunji Ruwase &lt;olruwase@microsoft.com&gt;
Co-authored-by: Joe Mayer &lt;114769929+jomayeri@users.noreply.github.com&gt;
diff --git a/.github/workflows/hpu-gaudi2.yml b/.github/workflows/hpu-gaudi2.yml
@@ -68,7 +68,6 @@ jobs:
         (test_flops_profiler.py and test_flops_profiler_in_inference)
         test_get_optim_files.py
         test_groups.py
-        test_init_on_device.py
         test_partition_balanced.py
         (test_adamw.py and TestAdamConfigs)
         test_coalesced_collectives.py
diff --git a/accelerator/hpu_accelerator.py b/accelerator/hpu_accelerator.py
@@ -42,9 +42,8 @@ def handles_memory_backpressure(self):
         return True
 
     def device_name(self, device_index=None):
-        if device_index is None:
-            return 'hpu'
-        return 'hpu:{}'.format(device_index)
+        # ignoring device_index.
+        return 'hpu'
 
     def device(self, device_index=None):
         return torch.device(self.device_name(device_index))
diff --git a/deepspeed/runtime/engine.py b/deepspeed/runtime/engine.py
@@ -1009,13 +1009,13 @@ def _set_distributed_vars(self, args):
         device_rank = args.device_rank if args is not None and hasattr(args, 'device_rank') else self.local_rank
         if device_rank >= 0:
             get_accelerator().set_device(device_rank)
-            self.device = torch.device(get_accelerator().device_name(), device_rank)
+            self.device = torch.device(get_accelerator().device_name(device_rank))
             self.world_size = dist.get_world_size()
             self.global_rank = dist.get_rank()
         else:
             self.world_size = 1
             self.global_rank = 0
-            self.device = torch.device(get_accelerator().device_name())
+            self.device = get_accelerator().device()
 
     # Configure based on command line arguments
     def _configure_with_arguments(self, args, mpu):
diff --git a/deepspeed/runtime/zero/utils.py b/deepspeed/runtime/zero/utils.py
@@ -68,7 +68,6 @@ def get_lst_from_rank0(lst: List[int]) -> None:
     lst_tensor = torch.tensor(
         lst if dist.get_rank() == 0 else [-1] * len(lst),
         dtype=int,
-        # device=get_accelerator().current_device_name(),
         device=torch.device(get_accelerator().device_name(os.environ["LOCAL_RANK"])),
         requires_grad=False,
     )

Original file line number	Diff line number	Diff line change
`@@ -68,7 +68,6 @@ def get_lst_from_rank0(lst: List[int]) -> None:`
`68`	`68`	`lst_tensor = torch.tensor(`
`69`	`69`	`lst if dist.get_rank() == 0 else [-1] * len(lst),`
`70`	`70`	`dtype=int,`
`71`		`- # device=get_accelerator().current_device_name(),`
`72`	`71`	`device=torch.device(get_accelerator().device_name(os.environ["LOCAL_RANK"])),`
`73`	`72`	`requires_grad=False,`
`74`	`73`	`)`