lucasjinreal
diff --git a/‎configs/coco-keypoints/yolox_kpts.yaml
+85 b/‎configs/coco-keypoints/yolox_kpts.yaml
+85
diff --git a/‎deploy/demo_quantized_int8.py
+161 b/‎deploy/demo_quantized_int8.py
+161
diff --git a/‎deploy/quant_atom/qt_ppq_sinst.py
+6-2 b/‎deploy/quant_atom/qt_ppq_sinst.py
+6-2
diff --git a/‎images/dog.jpg
160 KB b/‎images/dog.jpg
160 KB
diff --git a/‎readme.md
+13-11 b/‎readme.md
+13-11
@@ -0,0 +1,85 @@
+_BASE_: "../Base-YOLOv7.yaml"
+MODEL:
+  PIXEL_MEAN: [0.485, 0.456, 0.406] # same value as PP-YOLOv2, RGB order
+  PIXEL_STD: [0.229, 0.224, 0.225]
+
+  WEIGHTS: ""
+  MASK_ON: False
+  META_ARCHITECTURE: "YOLOX"
+  BACKBONE:
+    NAME: "build_cspdarknetx_backbone"
+
+  DARKNET:
+    WEIGHTS: ""
+    DEPTH_WISE: False
+    OUT_FEATURES: ["dark3", "dark4", "dark5"]
+
+  YOLO:
+    CLASSES: 80
+    IN_FEATURES: ["dark3", "dark4", "dark5"]
+    CONF_THRESHOLD: 0.001
+    NMS_THRESHOLD: 0.65
+    IGNORE_THRESHOLD: 0.7
+    WIDTH_MUL: 0.50
+    DEPTH_MUL: 0.33
+    LOSS_TYPE: "v7"
+    LOSS:
+      LAMBDA_IOU: 1.5
+
+DATASETS:
+  TRAIN: ("coco_2017_train",)
+  # TEST: ("coco_2014_val_mini",)
+  TEST: ("coco_2017_val",)
+
+INPUT:
+  # FORMAT: "RGB" # using BGR default
+  MIN_SIZE_TRAIN: (416, 512, 608, 768)
+  MAX_SIZE_TRAIN: 800 # force max size train to 800?
+  MIN_SIZE_TEST: 640
+  MAX_SIZE_TEST: 800
+  # open all augmentations
+  JITTER_CROP:
+    ENABLED: False
+  RESIZE:
+    ENABLED: False
+    # SHAPE: (540, 960)
+  DISTORTION:
+    ENABLED: True
+  COLOR_JITTER:
+    BRIGHTNESS: True
+    SATURATION: True
+  # MOSAIC:
+  #   ENABLED: True
+  #   NUM_IMAGES: 4
+  #   DEBUG_VIS: True
+  #   # MOSAIC_WIDTH: 960
+  #   # MOSAIC_HEIGHT: 540
+  MOSAIC_AND_MIXUP:
+    ENABLED: True
+    # ENABLED: False
+    DEBUG_VIS: False
+    ENABLE_MIXUP: False
+    DISABLE_AT_ITER: 120000
+
+
+SOLVER:
+  # enable fp16 training
+  AMP:
+    ENABLED: true
+  IMS_PER_BATCH: 112
+  BASE_LR: 0.027
+  STEPS: (60000, 80000)
+  WARMUP_FACTOR: 0.00033333
+  WARMUP_ITERS: 1200
+  MAX_ITER: 230000
+  LR_SCHEDULER_NAME: "WarmupCosineLR"
+
+TEST:
+  EVAL_PERIOD: 10000
+  # EVAL_PERIOD: 0
+OUTPUT_DIR: "output/coco_yolox_s_kpts"
+VIS_PERIOD: 5000
+
+DATALOADER:
+  # proposals are part of the dataset_dicts, and take a lot of RAM
+  NUM_WORKERS: 3
@@ -0,0 +1,161 @@
+from torch import Tensor
+from wanwu.core.backends.trt import TensorRTInferencer
+import os
+import cv2
+import argparse
+import numpy as np
+import onnxruntime
+from alfred.vis.image.det import visualize_det_cv2_part
+from alfred.vis.image.mask import vis_bitmasks_with_classes
+from alfred.utils.file_io import ImageSourceIter
+
+
+def vis_res_fast(img, boxes, masks, scores, labels):
+    if masks is not None:
+        # masks shape, might not same as img, resize contours if so
+        img = vis_bitmasks_with_classes(
+            img,
+            labels,
+            masks,
+            force_colors=None,
+            draw_contours=True,
+            mask_border_color=[255, 255, 255],
+        )
+    thickness = 1 if masks is None else 2
+    font_scale = 0.3 if masks is None else 0.4
+    if boxes:
+        img = visualize_det_cv2_part(
+            img,
+            scores,
+            labels,
+            boxes,
+            line_thickness=thickness,
+            font_scale=font_scale,
+        )
+    return img
+
+
+def load_test_image(f, h, w):
+    a = cv2.imread(f)
+    a = cv2.resize(a, (w, h))
+    a_t = np.expand_dims(np.array(a).astype(np.float32), axis=0)
+    return a_t, a
+
+
+def preprocess_image(img, h, w):
+    a = cv2.resize(img, (w, h))
+    a_t = np.expand_dims(np.array(a).astype(np.float32), axis=0)
+    return a_t, img
+
+
+def make_parser():
+    parser = argparse.ArgumentParser("onnxruntime inference sample")
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        default="yolox.onnx",
+        help="Input your onnx model.",
+    )
+    parser.add_argument(
+        "-i",
+        "--image_path",
+        type=str,
+        default="test_image.png",
+        help="Path to your input image.",
+    )
+    parser.add_argument(
+        "-o",
+        "--output_dir",
+        type=str,
+        default="demo_output",
+        help="Path to your output directory.",
+    )
+    parser.add_argument(
+        "-s",
+        "--score_thr",
+        type=float,
+        default=0.3,
+        help="Score threshould to filter the result.",
+    )
+    parser.add_argument(
+        "-t",
+        "--type",
+        default='sparseinst',
+        help="model type.",
+    )
+    return parser
+
+
+if __name__ == "__main__":
+    args = make_parser().parse_args()
+    
+    engine_f = args.model
+    trt_model = TensorRTInferencer(engine_f)
+    input_shape = trt_model.ori_input_shape
+    print('input shape: ', input_shape)
+
+    iter = ImageSourceIter(args.image_path)
+    while True:
+        im = next(iter)
+        if isinstance(im, str):
+            im = cv2.imread(im)
+
+        inp, ori_img = preprocess_image(im, h=input_shape[0], w=input_shape[1])
+        output = trt_model.infer(inp)
+
+        print(output)
+
+        if "sparse" in args.type:
+            masks, scores, labels = None, None, None
+            for o in output:
+                if o.dtype == np.float32:
+                    scores = o
+                if o.dtype == np.int32 or o.dtype == np.int64:
+                    labels = o
+                if o.dtype == bool:
+                    masks = o
+            masks = masks[0]
+            print(masks.shape)
+            if len(masks.shape) > 3:
+                masks = np.squeeze(masks, axis=1)
+            scores = scores[0]
+            labels = labels[0]
+            # keep = scores > 0.15
+            keep = scores > 0.06
+            scores = scores[keep]
+            labels = labels[keep]
+            masks = masks[keep]
+            print(scores)
+            print(labels)
+            print(masks.shape)
+            img = vis_res_fast(im, None, masks, scores, labels)
+        else:
+            predictions = demo_postprocess(output[0], input_shape, p6=args.with_p6)[0]
+            boxes = predictions[:, :4]
+            scores = predictions[:, 4:5] * predictions[:, 5:]
+
+            boxes_xyxy = np.ones_like(boxes)
+            boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.0
+            boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.0
+            boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.0
+            boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.0
+            # boxes_xyxy /= ratio
+            dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.65, score_thr=0.1)
+            final_boxes, final_scores, final_cls_inds = (
+                dets[:, :4],
+                dets[:, 4],
+                dets[:, 5],
+            )
+            img = visualize_det_cv2_part(
+                ori_img, final_scores, final_cls_inds, final_boxes
+            )
+            cv2.imshow("aa", img)
+            cv2.waitKey(0)
+
+        cv2.imshow("YOLOv7 SparseInst CPU int8", img)
+        if iter.video_mode:
+            if cv2.waitKey(1) & 0xFF == ord("q"):
+                break
+        else:
+            cv2.waitKey(0)
@@ -2,6 +2,8 @@
 
 Examples on how to quantize with PPQ
 
+I dont suggest you using PPQ, it has a lot of bugs.
+
 """
 from typing import Iterable
 
@@ -80,14 +82,16 @@ def collate_fn(batch):
     INPUT_SHAPE = [640, 640, 3]
     DEVICE = "cuda"  
     PLATFORM = (
-        TargetPlatform.ORT_OOS_INT8
+        # TargetPlatform.ORT_OOS_INT8
+        TargetPlatform.TRT_INT8
     ) 
     EXECUTING_DEVICE = "cpu"  # 'cuda' or 'cpu'.
 
     # create a setting for quantizing your network with PPL CUDA.
     # quant_setting = QuantizationSettingFactory.pplcuda_setting()
     quant_setting = QuantizationSettingFactory.default_setting()
-    quant_setting.equalization = True  # use layerwise equalization algorithm.
+    # quant_setting.equalization = True  # use layerwise equalization algorithm.
+    quant_setting.equalization = False  # tensorrt false
     quant_setting.dispatcher = (
         "conservative"  # dispatch this network in conservertive way.
     )
 
@@ -94,6 +94,19 @@ Here are some tasks need to be claimed:
 - [ ] MobileVIT: https://github.com/apple/ml-cvnets/blob/main/cvnets/models/classification/mobilevit.py
 
 
+## 🆕 News!
+
+- ***2022.05.09***: Big new function added! **We adopt YOLOX with Keypoints Head!**, model still under train, but you can check at code already;
+- ***2022.04.23***: We finished the int8 quantization on SparseInst! It works perfect! Download the onnx try it our by your self.
+- ***2022.04.15***: Now, we support the `SparseInst` onnx expport!
+- ***2022.03.25***: New instance seg supported! 40 FPS @ 37 mAP!! Which is fast;
+- ***2021.09.16***: First transformer based DETR model added, will explore more DETR series models;
+- ***2021.08.02***: **YOLOX** arch added, you can train YOLOX as well in this repo;
+- ***2021.07.25***: We found **YOLOv7-Res2net50** beat res50 and darknet53 at same speed level! 5% AP boost on custom dataset;
+- ***2021.07.04***: Added YOLOF and we can have a anchor free support as well, YOLOF achieves a better trade off on speed and accuracy;
+- ***2021.06.25***: this project first started.
+- more
+
 
 ## 💁‍♂️ Results
 
@@ -104,17 +117,6 @@ Here are some tasks need to be claimed:
 ![](https://s1.ax1x.com/2022/03/25/qN5zp6.png)  |  ![](https://s2.loli.net/2022/03/25/MBwq9YT7zC5Sd1A.png)
 
 
-## 🆕 News!
-
-- **2022.04.23**: We finished the int8 quantization on SparseInst! It works perfect! Download the onnx try it our by your self.
-- **2022.04.15**: Now, we support the `SparseInst` onnx expport!
-- **2022.03.25**: New instance seg supported! 40 FPS @ 37 mAP!! Which is fast;
-- **2021.09.16**: First transformer based DETR model added, will explore more DETR series models;
-- **2021.08.02**: **YOLOX** arch added, you can train YOLOX as well in this repo;
-- **2021.07.25**: We found **YOLOv7-Res2net50** beat res50 and darknet53 at same speed level! 5% AP boost on custom dataset;
-- **2021.07.04**: Added YOLOF and we can have a anchor free support as well, YOLOF achieves a better trade off on speed and accuracy;
-- **2021.06.25**: this project first started.
-- more
 
 
 ## 🧑‍🦯 Installation && Quick Start