software-mansion · JakubGonera · Feb 27, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 5, 2025
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -1,6 +1,8 @@
 #import "ImageSegmentationModel.h"
 #import <unordered_set>
-#import "../../utils/ImageProcessor.h"
+#import <algorithm>
+#import <vector>
+#i\port "../../utils/ImageProcessor.h"
 #import "../../utils/Numerical.h"
 #import "../../utils/Conversions.h"
 #import "opencv2/opencv.hpp"
@@ -57,7 +59,7 @@ - (NSArray *)preprocess:(cv::Mat &)input {
   return resizedLabelScores;
 }
 
-void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& maxArg,
+void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& argMax,
                 cv::Size originalSize, std::size_t numLabels) {
   std::size_t numOriginalPixels = originalSize.height * originalSize.width;
   for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) {
@@ -71,17 +73,12 @@ void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& maxArg,
 
     std::vector<double> adjustedScores = softmax(scores);
 
-    std::size_t maxArgIndex = 0;
-    double maxArgVal = 0;
     for (std::size_t label = 0; label < numLabels; ++label) {
       labelScores[label].at<double>(row, col) = adjustedScores[label];
-      if (adjustedScores[label] > maxArgVal) {
-        maxArgIndex = label;
-        maxArgVal = adjustedScores[label];
-      }
     }
 
-    maxArg.at<int>(row, col) = maxArgIndex;
+    auto maxIt = std::max_element(scores.begin(), scores.end());
+    argMax.at<int>(row, col) = std::distance(scores.begin(), maxIt);
   }
 }
 
@@ -98,10 +95,10 @@ - (NSDictionary *)postprocess:(NSArray *)output
   std::vector<cv::Mat> resizedLabelScores = 
         rescaleResults(output, numLabels, modelImageSize, originalSize);
 
-  cv::Mat maxArg = cv::Mat(originalSize, CV_32S);
+  cv::Mat argMax = cv::Mat(originalSize, CV_32S);
 
-  // For each pixel apply softmax across all the labels and calculate the maxArg
-  adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels);
+  // For each pixel apply softmax across all the labels and calculate the argMax
+  adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels);
 
   std::unordered_set<std::string> labelSet;
 
@@ -120,7 +117,7 @@ - (NSDictionary *)postprocess:(NSArray *)output
     }
   }
 
-  result[@"argmax"] = simpleMatToNSArray<int>(maxArg);
+  result[@"argmax"] = simpleMatToNSArray<int>(argMax);
 
   return result;
 }

diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -2,6 +2,7 @@ import { useState } from 'react';
 import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
 import { ETError, getError } from '../../Error';
 import { useModule } from '../useModule';
+import { DeeplabLabel } from '../../types/image_segmentation';
 
 interface Props {
   modelSource: string | number;
@@ -16,8 +17,8 @@ export const useImageSegmentation = ({
   downloadProgress: number;
   forward: (
     input: string,
-    classesOfInterest?: string[]
-  ) => Promise<{ [category: string]: number[] }>;
+    classesOfInterest?: DeeplabLabel[]
+  ) => Promise<{ [key in DeeplabLabel]?: number[] }>;
 } => {
   const [module, _] = useState(() => new _ImageSegmentationModule());
   const [isGenerating, setIsGenerating] = useState(false);
@@ -26,7 +27,7 @@ export const useImageSegmentation = ({
     module,
   });
 
-  const forward = async (input: string, classesOfInterest?: string[]) => {
+  const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => {
     if (!isReady) {
       throw new Error(getError(ETError.ModuleNotLoaded));
     }
@@ -36,8 +37,20 @@ export const useImageSegmentation = ({
 
     try {
       setIsGenerating(true);
-      const output = await module.forward(input, classesOfInterest || []);
-      return output;
+      const stringDict = await module.forward(
+        input,
+        (classesOfInterest || []).map((label) => DeeplabLabel[label])
+      );
+
+      let enumDict: { [key in DeeplabLabel]?: number[] } = {};
+
+      for (const key in stringDict) {
+        if (key in DeeplabLabel) {
+          const enumKey = DeeplabLabel[key as keyof typeof DeeplabLabel];
+          enumDict[enumKey] = stringDict[key];
+        }
+      }
+      return enumDict;
     } catch (e) {
       throw new Error(getError(e));
     } finally {

diff --git a/src/index.tsx b/src/index.tsx
@@ -28,6 +28,7 @@ export * from './utils/listDownloadedResources';
 // types
 export * from './types/object_detection';
 export * from './types/ocr';
+export * from './types/image_segmentation';
 
 // constants
 export * from './constants/modelUrls';
diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -5,12 +5,11 @@ import { getError } from '../../Error';
 export class ImageSegmentationModule extends BaseModule {
   static module = new _ImageSegmentationModule();
 
-  static async forward(input: string, classesOfInteres?: string[]) {
+  static async forward(input: string, classesOfInterest: string[]) {
     try {
-      return await (this.module.forward(
-        input,
-        classesOfInteres || []
-      ) as ReturnType<_ImageSegmentationModule['forward']>);
+      return await (this.module.forward(input, classesOfInterest) as ReturnType<
+        _ImageSegmentationModule['forward']
+      >);
     } catch (e) {
       throw new Error(getError(e));
     }

diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts
@@ -0,0 +1,24 @@
+export enum DeeplabLabel {
+  background,
+  aeroplane,
+  bicycle,
+  bird,
+  boat,
+  bottle,
+  bus,
+  car,
+  cat,
+  chair,
+  cow,
+  diningtable,
+  dog,
+  horse,
+  motorbike,
+  person,
+  pottedplant,
+  sheep,
+  sofa,
+  train,
+  tvmonitor,
+  argmax, // Additional label not present in the model
+}