feat: enable quantization on onnx mnist model (#2)

Gumichocopengin8 · web-flow · commit d6fe9adb55a1 · 2024-09-04T10:01:42.000+09:00
diff --git a/README.md b/README.md
@@ -1 +1,8 @@
 # ONNX MNIST on Web
+
+Run MNIST model on web using ONNX runtime.
+
+## How to Run
+
+- [Pytorch README](./pytorch/README.md)
+- [Web README](./web/README.md)
diff --git a/pytorch/.gitignore b/pytorch/.gitignore
@@ -3,3 +3,4 @@ data/
 __pycache__/
 mnist_cnn.pt
 mnist_cnn.onnx
+mnist_cnn.infer.onnx
diff --git a/pytorch/export_to_onnx.py b/pytorch/export_to_onnx.py
@@ -1,18 +1,40 @@
 # Convert pt file to onnx file
 import torch
 from mnist import Net
+from onnxruntime.quantization import quantize_dynamic, QuantType, quant_pre_process
 from pathlib import Path
 
+MODEL_PATH = Path("./mnist_cnn.pt")
+INTERMEDIATE_OUTPUT_DIR = Path(".")
+WEB_OUTPUT_DIR = Path("../web/public")
+ONNX_OUTPUT = "mnist_cnn.onnx"
+ONNX_QUANT_PREPROCESS_OUTPUT = "mnist_cnn.infer.onnx"
+ONNX_QUANT_OUTPUT = "mnist_cnn.quant.onnx"
+
+
+def quantizate_onnx_model():
+    # Quantization
+    # https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/image_classification/cpu/ReadMe.md
+    quant_pre_process(
+        input_model=INTERMEDIATE_OUTPUT_DIR / ONNX_OUTPUT,
+        output_model_path=INTERMEDIATE_OUTPUT_DIR / ONNX_QUANT_PREPROCESS_OUTPUT,
+    )
+    quantize_dynamic(
+        model_input=INTERMEDIATE_OUTPUT_DIR / ONNX_QUANT_PREPROCESS_OUTPUT,
+        model_output=WEB_OUTPUT_DIR / ONNX_QUANT_OUTPUT,
+        weight_type=QuantType.QUInt8,  # bug: https://github.com/microsoft/onnxruntime/issues/15888#issuecomment-1856864610
+    )
+
 
 def main():
-    MODEL_PATH = Path("./mnist_cnn.pt")
     mnist_model = Net()
     mnist_model.load_state_dict(torch.load(MODEL_PATH))
     mnist_model.eval()
     dymmy_input = torch.zeros(1, 1, 28, 28)
     torch.onnx.export(
-        mnist_model, dymmy_input, "../web/public/mnist_cnn.onnx", verbose=True
+        mnist_model, dymmy_input, INTERMEDIATE_OUTPUT_DIR / ONNX_OUTPUT, verbose=True
     )
+    quantizate_onnx_model()
 
 
 if __name__ == "__main__":
diff --git a/web/public/mnist_cnn.onnx b/web/public/mnist_cnn.onnx
diff --git a/web/public/mnist_cnn.quant.onnx b/web/public/mnist_cnn.quant.onnx
diff --git a/web/src/utils/mnist.ts b/web/src/utils/mnist.ts
@@ -5,7 +5,7 @@ ort.env.wasm.wasmPaths = './dist/'; // defined in vite.config.ts as viteStaticCo
 export const MNIST_IMAGE_SIDE_SIZE = 28;
 
 export const initOnnx = (): Promise<ort.InferenceSession> => {
-  const session = ort.InferenceSession.create('./mnist_cnn.onnx', {
+  const session = ort.InferenceSession.create('./mnist_cnn.quant.onnx', {
     enableProfiling: true,
     executionProviders: ['wasm'],
   });