[Improvment] Add seed option for sampler (open-mmlab#4665)

xvjiarui · web-flow · commit 6215cc165239 · 2021-02-25T15:31:09.000+08:00
diff --git a/mmdet/datasets/builder.py b/mmdet/datasets/builder.py
@@ -106,11 +106,11 @@ def build_dataloader(dataset,
         # DistributedGroupSampler will definitely shuffle the data to satisfy
         # that images on each GPU are in the same group
         if shuffle:
-            sampler = DistributedGroupSampler(dataset, samples_per_gpu,
-                                              world_size, rank)
+            sampler = DistributedGroupSampler(
+                dataset, samples_per_gpu, world_size, rank, seed=seed)
         else:
             sampler = DistributedSampler(
-                dataset, world_size, rank, shuffle=False)
+                dataset, world_size, rank, shuffle=False, seed=seed)
         batch_size = samples_per_gpu
         num_workers = workers_per_gpu
     else:
diff --git a/mmdet/datasets/samplers/distributed_sampler.py b/mmdet/datasets/samplers/distributed_sampler.py
@@ -6,15 +6,22 @@
 
 class DistributedSampler(_DistributedSampler):
 
-    def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True):
-        super().__init__(dataset, num_replicas=num_replicas, rank=rank)
-        self.shuffle = shuffle
+    def __init__(self,
+                 dataset,
+                 num_replicas=None,
+                 rank=None,
+                 shuffle=True,
+                 seed=0):
+        super().__init__(
+            dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle)
+        # for the compatibility from PyTorch 1.3+
+        self.seed = seed if seed is not None else 0
 
     def __iter__(self):
         # deterministically shuffle based on epoch
         if self.shuffle:
             g = torch.Generator()
-            g.manual_seed(self.epoch)
+            g.manual_seed(self.epoch + self.seed)
             indices = torch.randperm(len(self.dataset), generator=g).tolist()
         else:
             indices = torch.arange(len(self.dataset)).tolist()
diff --git a/mmdet/datasets/samplers/group_sampler.py b/mmdet/datasets/samplers/group_sampler.py
@@ -64,13 +64,17 @@ class DistributedGroupSampler(Sampler):
         num_replicas (optional): Number of processes participating in
             distributed training.
         rank (optional): Rank of the current process within num_replicas.
+        seed (int, optional): random seed used to shuffle the sampler if
+            ``shuffle=True``. This number should be identical across all
+            processes in the distributed group. Default: 0.
     """
 
     def __init__(self,
                  dataset,
                  samples_per_gpu=1,
                  num_replicas=None,
-                 rank=None):
+                 rank=None,
+                 seed=0):
         _rank, _num_replicas = get_dist_info()
         if num_replicas is None:
             num_replicas = _num_replicas
@@ -81,6 +85,7 @@ def __init__(self,
         self.num_replicas = num_replicas
         self.rank = rank
         self.epoch = 0
+        self.seed = seed if seed is not None else 0
 
         assert hasattr(self.dataset, 'flag')
         self.flag = self.dataset.flag
@@ -96,7 +101,7 @@ def __init__(self,
     def __iter__(self):
         # deterministically shuffle based on epoch
         g = torch.Generator()
-        g.manual_seed(self.epoch)
+        g.manual_seed(self.epoch + self.seed)
 
         indices = []
         for i, size in enumerate(self.group_sizes):