RachelXu7
diff --git a/‎benchmark/README.md
+65 b/‎benchmark/README.md
+65
diff --git a/‎benchmark/deeplabv3p.yml
+2-1 b/‎benchmark/deeplabv3p.yml
+2-1
diff --git a/‎benchmark/hrnet.yml
+4-1 b/‎benchmark/hrnet.yml
+4-1
diff --git a/‎benchmark/hrnet48.yml
+51 b/‎benchmark/hrnet48.yml
+51
diff --git a/‎legacy/configs/benchmark/deeplabv3p_resnet50_vd_cityscapes.yaml
+51 b/‎legacy/configs/benchmark/deeplabv3p_resnet50_vd_cityscapes.yaml
+51
diff --git a/‎legacy/configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml
+53 b/‎legacy/configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml
+53
diff --git a/‎legacy/pdseg/eval.py
+25-19 b/‎legacy/pdseg/eval.py
+25-19
@@ -0,0 +1,65 @@
+# PaddleSeg Benchmark with AMP
+
+## 动态图
+
+数据集cityscapes 放置于data目录下
+
+通过 **--fp16** 开启amp训练。
+
+单机单卡使用如下命令进行训练：
+```
+export CUDA_VISIBLE_DEVICES=0
+python train.py --config benchmark/hrnet.yml --iters 2000 --log_iters 10 --fp16
+```
+
+单机多卡使用如下命令进行训练：
+```
+export CUDA_VISIBLE_DEVICES=0,1
+python -m paddle.distributed.launch train.py --config benchmark/hrnet.yml --iters 2000 --log_iters 10 --fp16
+# fleet开启多卡训练
+fleetrun train.py --config benchmark/hrnet.yml --iters 2000 --log_iters 10 --fp16
+```
+
+DeepLabv3+ 模型的配置文件为：
+benchmark/deeplabv3p.yml
+
+**注意**
+
+* 动态图中batch_size设置为每卡的batch_size
+* DeepLabv3+ 支持通过传入 **--data_format NHWC**进行‘NHWC’数据格式的训练。
+
+
+
+## 静态图
+数据集cityscapes 放置于legacy/dataset目录下
+
+通过 **MODEL.FP16 True** 开启amp训练
+单机单卡使用如下命令进行训练：
+```
+cd legacy
+export CUDA_VISIBLE_DEVICES=0
+python pdseg/train.py --cfg configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml --use_gpu  --use_mpio --log_steps 10 BATCH_SIZE 2 SOLVER.NUM_EPOCHS 3 MODEL.FP16 True
+```
+
+单机多卡使用如下命令进行训练：
+```
+export CUDA_VISIBLE_DEVICES=0,1
+fleetrun pdseg/train.py --cfg configs/benchmark/hrnetw18_cityscapes_1024x512_215.yaml --use_gpu  --use_mpio --log_steps 10 BATCH_SIZE 4 SOLVER.NUM_EPOCHS 3 MODEL.FP16 True
+```
+
+deeplabv3p模型的配置文件为：
+configs/benchmark/deeplabv3p_resnet50_vd_cityscapes.yaml
+
+**注意**
+静态图中的BATCH_SIZE为总的batch size。
+
+## 竞品
+竞品为[mmsegmentation](https://github.com/open-mmlab/mmsegmentation)
+
+对应竞品配置文件为：
+
+configs/hrnet/fcn_hr18_512x1024_80k_cityscapes.py
+
+configs/deeplabv3plus/deeplabv3plus_r50-d8_512x1024_80k_cityscapes.py
+
+相关执行方式请参考其官方仓库。
@@ -1,5 +1,5 @@
 batch_size: 2
-iters: 500
+iters: 80000
 
 train_dataset:
   type: Cityscapes
@@ -29,6 +29,7 @@ model:
     type: ResNet50_vd
     output_stride: 8
     multi_grid: [1, 2, 4]
+    pretrained: https://bj.bcebos.com/paddleseg/dygraph/resnet50_vd_ssld_v2.tar.gz
   num_classes: 19
   backbone_indices: [0, 3]
   aspp_ratios: [1, 12, 24, 36]
 
@@ -1,5 +1,5 @@
 batch_size: 2
-iters: 500
+iters: 80000
 
 train_dataset:
   type: Cityscapes
@@ -27,8 +27,11 @@ model:
   type: FCN
   backbone:
     type: HRNet_W18
+    pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w18_ssld.tar.gz
+    padding_same: False
   num_classes: 19
   backbone_indices: [-1]
+  bias: False
 
 optimizer:
   type: sgd
 
@@ -0,0 +1,51 @@
+batch_size: 2
+iters: 80000
+
+train_dataset:
+  type: Cityscapes
+  dataset_root: data/cityscapes
+  transforms:
+    - type: ResizeStepScaling
+      min_scale_factor: 0.5
+      max_scale_factor: 2.0
+      scale_step_size: 0.25
+    - type: RandomPaddingCrop
+      crop_size: [1024, 512]
+    - type: RandomHorizontalFlip
+    - type: RandomDistort
+    - type: Normalize
+  mode: train
+
+val_dataset:
+  type: Cityscapes
+  dataset_root: data/cityscapes
+  transforms:
+    - type: Normalize
+  mode: val
+
+model:
+  type: FCN
+  backbone:
+    type: HRNet_W48
+    pretrained: https://bj.bcebos.com/paddleseg/dygraph/hrnet_w48_ssld.tar.gz
+    padding_same: False
+  num_classes: 19
+  backbone_indices: [-1]
+  bias: False
+
+optimizer:
+  type: sgd
+  weight_decay: 0.0005
+
+learning_rate:
+  value: 0.01
+  decay:
+    type: poly
+    power: 0.9
+    end_lr: 0.0
+
+loss:
+  types:
+    - type: CrossEntropyLoss
+      ignore_index: 255
+  coef: [1]
@@ -0,0 +1,51 @@
+EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling
+TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling
+AUG:
+    AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
+    FIX_RESIZE_SIZE: (2048, 1024) # (width, height), for unpadding
+    INF_RESIZE_VALUE: 500  # for rangescaling
+    MAX_RESIZE_VALUE: 600  # for rangescaling
+    MIN_RESIZE_VALUE: 400  # for rangescaling
+    MAX_SCALE_FACTOR: 2.0  # for stepscaling
+    MIN_SCALE_FACTOR: 0.5  # for stepscaling
+    SCALE_STEP_SIZE: 0.25  # for stepscaling
+    MIRROR: True
+    TO_RGB: True
+BATCH_SIZE: 8
+DATASET:
+    DATA_DIR: "./dataset/cityscapes/"
+    IMAGE_TYPE: "rgb"  # choice rgb or rgba
+    NUM_CLASSES: 19
+    TEST_FILE_LIST: "dataset/cityscapes/val.list"
+    TRAIN_FILE_LIST: "dataset/cityscapes/train.list"
+    VAL_FILE_LIST: "dataset/cityscapes/val.list"
+    IGNORE_INDEX: 255
+    SEPARATOR: " "
+FREEZE:
+    MODEL_FILENAME: "__model__"
+    PARAMS_FILENAME: "__params__"
+MODEL:
+    DEFAULT_NORM_TYPE: "bn"
+    MODEL_NAME: "deeplabv3p"
+    DEEPLAB:
+        ASPP_WITH_SEP_CONV: True
+        DECODER_USE_SEP_CONV: True
+        BACKBONE: "resnet_vd_50"
+        OUTPUT_STRIDE: 8
+        BIAS: null
+        ALIGN_CORNERS: False
+        BENCHMARK: True
+        DECODER:
+            ACT: False
+TRAIN:
+    PRETRAINED_MODEL_DIR: u"pretrained_model/resnet50_vd_imagenet"
+    MODEL_SAVE_DIR: "output/deeplabv3p_resnet50_vd_bn_cityscapes"
+    SNAPSHOT_EPOCH: 10
+    SYNC_BATCH_NORM: True
+TEST:
+    TEST_MODEL: "output/deeplabv3p_resnet50_vd_bn_cityscapes/final"
+SOLVER:
+    LR: 0.01
+    LR_POLICY: "poly"
+    OPTIMIZER: "sgd"
+    NUM_EPOCHS: 215
@@ -0,0 +1,53 @@
+EVAL_CROP_SIZE: (2048, 1024) # (width, height), for unpadding rangescaling and stepscaling
+TRAIN_CROP_SIZE: (1024, 512) # (width, height), for unpadding rangescaling and stepscaling
+AUG:
+#    AUG_METHOD: "unpadding" # choice unpadding rangescaling and stepscaling
+    AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
+    FIX_RESIZE_SIZE: (1024, 512) # (width, height), for unpadding
+    INF_RESIZE_VALUE: 500  # for rangescaling
+    MAX_RESIZE_VALUE: 600  # for rangescaling
+    MIN_RESIZE_VALUE: 400  # for rangescaling
+    MAX_SCALE_FACTOR: 2.0  # for stepscaling
+    MIN_SCALE_FACTOR: 0.5  # for stepscaling
+    SCALE_STEP_SIZE: 0.25  # for stepscaling
+    MIRROR: True
+BATCH_SIZE: 8
+
+DATASET:
+    DATA_DIR: "./dataset/cityscapes/"
+    IMAGE_TYPE: "rgb"  # choice rgb or rgba
+    NUM_CLASSES: 19
+    TEST_FILE_LIST: "./dataset/cityscapes/val.list"
+    TRAIN_FILE_LIST: "./dataset/cityscapes/train.list"
+    VAL_FILE_LIST: "./dataset/cityscapes/val.list"
+    IGNORE_INDEX: 255
+    SEPARATOR: " "
+
+MODEL:
+    MODEL_NAME: "hrnet"
+    DEFAULT_NORM_TYPE: "bn"
+    HRNET:
+        STAGE2:
+            NUM_CHANNELS: [18, 36]
+        STAGE3:
+            NUM_CHANNELS: [18, 36, 72]
+        STAGE4:
+            NUM_CHANNELS: [18, 36, 72, 144]
+        BIAS: False
+        ALIGN_CORNERS: False
+
+TRAIN:
+    PRETRAINED_MODEL_DIR: u"./pretrained_model/hrnet_w18_ssld"
+    MODEL_SAVE_DIR: "output/hrnetw18_bn_cityscapes"
+    SNAPSHOT_EPOCH: 10
+    SYNC_BATCH_NORM: True
+
+TEST:
+    TEST_MODEL: "output/hrnetw18_bn_cityscapes/best_model"
+
+SOLVER:
+    LR: 0.01
+    LR_POLICY: "poly"
+    WEIGHT_DECAY: 5.0e-4
+    OPTIMIZER: "sgd"
+    NUM_EPOCHS: 215
@@ -24,13 +24,15 @@
 import sys
 import argparse
 import pprint
+import time
+
 import numpy as np
 import paddle
-import paddle.fluid as fluid
+import paddle.static as static
 
 from utils import paddle_utils
 from utils.config import cfg
-from utils.timer import Timer, calculate_eta
+from utils.timer import TimeAverager, calculate_eta
 from models.model_builder import build_model
 from models.model_builder import ModelPhase
 from reader import SegDataset
@@ -82,8 +84,8 @@ def evaluate(cfg,
              **kwargs):
     np.set_printoptions(precision=5, suppress=True)
 
-    startup_prog = fluid.Program()
-    test_prog = fluid.Program()
+    startup_prog = static.Program()
+    test_prog = static.Program()
     dataset = SegDataset(
         file_list=cfg.DATASET.VAL_FILE_LIST,
         mode=ModelPhase.EVAL,
@@ -109,17 +111,17 @@ def data_generator():
 
     # Get device environment
     if use_gpu:
-        places = fluid.cuda_places()
+        places = static.cuda_places()
     elif use_xpu:
         xpu_id = int(os.environ.get('FLAGS_selected_xpus', 0))
-        places = [fluid.XPUPlace(xpu_id)]
+        places = [paddle.XPUPlace(xpu_id)]
     else:
-        places = fluid.cpu_places()
+        places = static.cpu_places()
     place = places[0]
     dev_count = len(places)
     print("#Device count: {}".format(dev_count))
 
-    exe = fluid.Executor(place)
+    exe = static.Executor(place)
     exe.run(startup_prog)
 
     test_prog = test_prog.clone(for_test=True)
@@ -132,9 +134,9 @@ def data_generator():
     if ckpt_dir is not None:
         print('load test model:', ckpt_dir)
         try:
-            fluid.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
+            static.load(test_prog, os.path.join(ckpt_dir, 'model'), exe)
         except:
-            fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
+            paddle.fluid.io.load_params(exe, ckpt_dir, main_program=test_prog)
 
     # Use streaming confusion matrix to calculate mean_iou
     np.set_printoptions(
@@ -144,11 +146,13 @@ def data_generator():
     num_images = 0
     step = 0
     all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
-    timer = Timer()
-    timer.start()
+    reader_cost_averager = TimeAverager()
+    batch_cost_averager = TimeAverager()
+    batch_start = time.time()
     data_loader.start()
     while True:
         try:
+            reader_cost_averager.record(time.time() - batch_start)
             step += 1
             loss, pred, grts, masks = exe.run(
                 test_prog, fetch_list=fetch_list, return_numpy=True)
@@ -160,15 +164,17 @@ def data_generator():
             _, iou = conf_mat.mean_iou()
             _, acc = conf_mat.accuracy()
 
-            speed = 1.0 / timer.elapsed_time()
-
+            batch_cost_averager.record(
+                time.time() - batch_start, num_samples=cfg.BATCH_SIZE)
+            batch_cost = batch_cost_averager.get_average()
+            reader_cost = reader_cost_averager.get_average()
+            eta = calculate_eta(all_step - step, batch_cost)
             print(
-                "[EVAL]step: {} loss: {:.5f} acc: {:.4f} IoU: {:.4f} step/sec: {:.2f} | ETA {}"
-                .format(step, loss, acc, iou, speed,
-                        calculate_eta(all_step - step, speed)))
-            timer.restart()
+                "[EVAL]step: {} loss: {:.5f} acc: {:.4f} IoU: {:.4f} batch_cost: {:.4f}, reader_cost: {:.5f} | ETA {}"
+                .format(step, loss, acc, iou, batch_cost, reader_cost, eta))
+            batch_start = time.time()
             sys.stdout.flush()
-        except fluid.core.EOFException:
+        except paddle.fluid.core.EOFException:
             break
 
     category_iou, avg_iou = conf_mat.mean_iou()