From 547a71840ddb0bda2b688426e2f0eebf8e00728a Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Thu, 27 Feb 2025 11:19:23 +0100
Subject: [PATCH 1/9] Add scaffolding for image segmentation native code

---
 ios/RnExecutorch/ImageSegmentation.h          |  5 ++
 ios/RnExecutorch/ImageSegmentation.mm         | 59 +++++++++++++++++++
 ios/RnExecutorch/StyleTransfer.mm             |  2 +-
 .../ImageSegmentationModel.h                  |  5 ++
 .../ImageSegmentationModel.mm                 |  6 ++
 .../{ => style_transfer}/StyleTransferModel.h |  2 +-
 .../StyleTransferModel.mm                     |  2 +-
 .../computer_vision/useImageSegmentation.ts   | 31 ++++++++++
 src/index.tsx                                 |  2 +
 src/modules/BaseModule.ts                     |  2 +
 .../ImageSegmentationModule.ts                | 12 ++++
 src/native/NativeImageSegmentation.ts         | 10 ++++
 src/native/RnExecutorchModules.ts             | 29 +++++++++
 13 files changed, 164 insertions(+), 3 deletions(-)
 create mode 100644 ios/RnExecutorch/ImageSegmentation.h
 create mode 100644 ios/RnExecutorch/ImageSegmentation.mm
 create mode 100644 ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
 create mode 100644 ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
 rename ios/RnExecutorch/models/{ => style_transfer}/StyleTransferModel.h (90%)
 rename ios/RnExecutorch/models/{ => style_transfer}/StyleTransferModel.mm (97%)
 create mode 100644 src/hooks/computer_vision/useImageSegmentation.ts
 create mode 100644 src/modules/computer_vision/ImageSegmentationModule.ts
 create mode 100644 src/native/NativeImageSegmentation.ts

diff --git a/ios/RnExecutorch/ImageSegmentation.h b/ios/RnExecutorch/ImageSegmentation.h
new file mode 100644
index 00000000..59ed56a4
--- /dev/null
+++ b/ios/RnExecutorch/ImageSegmentation.h
@@ -0,0 +1,5 @@
+#import <RnExecutorchSpec/RnExecutorchSpec.h>
+
+@interface ImageSegmentation : NSObject <NativeImageSegmentationSpec>
+
+@end
\ No newline at end of file
diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm
new file mode 100644
index 00000000..ef526087
--- /dev/null
+++ b/ios/RnExecutorch/ImageSegmentation.mm
@@ -0,0 +1,59 @@
+#import "ImageSegmentation.h"
+#import "models/image_segmentation/ImageSegmentationModel.h"
+#import "models/BaseModel.h"
+#import "utils/ETError.h"
+#import <ExecutorchLib/ETModel.h>
+#import <React/RCTBridgeModule.h>
+
+@implementation ImageSegmentation {
+  ImageSegmentationModel *model;
+}
+
+RCT_EXPORT_MODULE()
+
+- (void)loadModule:(NSString *)modelSource
+           resolve:(RCTPromiseResolveBlock)resolve
+            reject:(RCTPromiseRejectBlock)reject {
+
+  NSLog(@"Segmentation: loadModule");
+  model = [[ImageSegmentationModel alloc] init];
+  [model
+       loadModel:[NSURL URLWithString:modelSource]
+      completion:^(BOOL success, NSNumber *errorCode) {
+        if (success) {
+          resolve(errorCode);
+          return;
+        }
+
+        reject(@"init_module_error",
+               [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]],
+               nil);
+        return;
+      }];
+}
+
+- (void)forward:(NSString *)input
+        resolve:(RCTPromiseResolveBlock)resolve
+         reject:(RCTPromiseRejectBlock)reject {
+    NSLog(@"Segmentation: forward");
+//   @try {
+//     cv::Mat image = [ImageProcessor readImage:input];
+//     cv::Mat resultImage = [model runModel:image];
+
+//     NSString *tempFilePath = [ImageProcessor saveToTempFile:resultImage];
+//     resolve(tempFilePath);
+//     return;
+//   } @catch (NSException *exception) {
+//     NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason);
+//     reject(@"forward_error",
+//            [NSString stringWithFormat:@"%@", exception.reason], nil);
+//     return;
+//   }
+}
+
+- (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
+    (const facebook::react::ObjCTurboModule::InitParams &)params {
+  return std::make_shared<facebook::react::NativeImageSegmentationSpecJSI>(params);
+}
+
+@end
diff --git a/ios/RnExecutorch/StyleTransfer.mm b/ios/RnExecutorch/StyleTransfer.mm
index 08e8d4a3..52930cd4 100644
--- a/ios/RnExecutorch/StyleTransfer.mm
+++ b/ios/RnExecutorch/StyleTransfer.mm
@@ -1,7 +1,7 @@
 #import "StyleTransfer.h"
 #import "ImageProcessor.h"
 #import "models/BaseModel.h"
-#import "models/StyleTransferModel.h"
+#import "models/style_transfer/StyleTransferModel.h"
 #import "utils/ETError.h"
 #import <ExecutorchLib/ETModel.h>
 #import <React/RCTBridgeModule.h>
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
new file mode 100644
index 00000000..0e02a94c
--- /dev/null
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
@@ -0,0 +1,5 @@
+#import "../BaseModel.h"
+
+@interface ImageSegmentationModel : BaseModel
+
+@end
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
new file mode 100644
index 00000000..8212bdee
--- /dev/null
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -0,0 +1,6 @@
+#import "ImageSegmentationModel.h"
+
+@implementation ImageSegmentationModel {
+}
+
+@end
diff --git a/ios/RnExecutorch/models/StyleTransferModel.h b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h
similarity index 90%
rename from ios/RnExecutorch/models/StyleTransferModel.h
rename to ios/RnExecutorch/models/style_transfer/StyleTransferModel.h
index 1fd91d7b..20cdf6dd 100644
--- a/ios/RnExecutorch/models/StyleTransferModel.h
+++ b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h
@@ -1,4 +1,4 @@
-#import "BaseModel.h"
+#import "../BaseModel.h"
 #import "opencv2/opencv.hpp"
 
 @interface StyleTransferModel : BaseModel
diff --git a/ios/RnExecutorch/models/StyleTransferModel.mm b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm
similarity index 97%
rename from ios/RnExecutorch/models/StyleTransferModel.mm
rename to ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm
index 6051e24b..6a351431 100644
--- a/ios/RnExecutorch/models/StyleTransferModel.mm
+++ b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm
@@ -1,5 +1,5 @@
 #import "StyleTransferModel.h"
-#import "../utils/ImageProcessor.h"
+#import "../../utils/ImageProcessor.h"
 #import "opencv2/opencv.hpp"
 
 @implementation StyleTransferModel {
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
new file mode 100644
index 00000000..2134b624
--- /dev/null
+++ b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -0,0 +1,31 @@
+import { useState } from 'react';
+import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
+import { useModule } from '../useModule';
+
+interface Props {
+  modelSource: string | number;
+}
+
+export const useImageSegmentation = ({
+  modelSource,
+}: Props): {
+  error: string | null;
+  isReady: boolean;
+  isGenerating: boolean;
+  downloadProgress: number;
+  forward: (input: string) => Promise<string>;
+} => {
+  const [module, _] = useState(() => new _ImageSegmentationModule());
+  const {
+    error,
+    isReady,
+    isGenerating,
+    downloadProgress,
+    forwardImage: forward,
+  } = useModule({
+    modelSource,
+    module,
+  });
+
+  return { error, isReady, isGenerating, downloadProgress, forward };
+};
diff --git a/src/index.tsx b/src/index.tsx
index 7ae7a7ad..8d322696 100644
--- a/src/index.tsx
+++ b/src/index.tsx
@@ -2,6 +2,7 @@
 export * from './hooks/computer_vision/useClassification';
 export * from './hooks/computer_vision/useObjectDetection';
 export * from './hooks/computer_vision/useStyleTransfer';
+export * from './hooks/computer_vision/useImageSegmentation';
 export * from './hooks/computer_vision/useOCR';
 export * from './hooks/computer_vision/useVerticalOCR';
 
@@ -14,6 +15,7 @@ export * from './hooks/general/useExecutorchModule';
 export * from './modules/computer_vision/ClassificationModule';
 export * from './modules/computer_vision/ObjectDetectionModule';
 export * from './modules/computer_vision/StyleTransferModule';
+export * from './modules/computer_vision/ImageSegmentationModule';
 export * from './modules/computer_vision/OCRModule';
 export * from './modules/computer_vision/VerticalOCRModule';
 
diff --git a/src/modules/BaseModule.ts b/src/modules/BaseModule.ts
index e977836f..56cf2e3d 100644
--- a/src/modules/BaseModule.ts
+++ b/src/modules/BaseModule.ts
@@ -1,4 +1,5 @@
 import {
+  _ImageSegmentationModule,
   _StyleTransferModule,
   _ObjectDetectionModule,
   _ClassificationModule,
@@ -10,6 +11,7 @@ import { getError } from '../Error';
 
 export class BaseModule {
   static module:
+    | _ImageSegmentationModule
     | _StyleTransferModule
     | _ObjectDetectionModule
     | _ClassificationModule
diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts
new file mode 100644
index 00000000..eae124a8
--- /dev/null
+++ b/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -0,0 +1,12 @@
+import { BaseCVModule } from './BaseCVModule';
+import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
+
+export class ImageSegmentationModule extends BaseCVModule {
+  static module = new _ImageSegmentationModule();
+
+  static async forward(input: string) {
+    return await (super.forward(input) as ReturnType<
+      _ImageSegmentationModule['forward']
+    >);
+  }
+}
diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts
new file mode 100644
index 00000000..1dcc9c27
--- /dev/null
+++ b/src/native/NativeImageSegmentation.ts
@@ -0,0 +1,10 @@
+import type { TurboModule } from 'react-native';
+import { TurboModuleRegistry } from 'react-native';
+
+export interface Spec extends TurboModule {
+  loadModule(modelSource: string): Promise<number>;
+
+  forward(input: string): Promise<string>;
+}
+
+export default TurboModuleRegistry.get<Spec>('ImageSegmentation');
diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts
index b1edcf52..c48c08a3 100644
--- a/src/native/RnExecutorchModules.ts
+++ b/src/native/RnExecutorchModules.ts
@@ -2,6 +2,7 @@ import { Platform } from 'react-native';
 import { Spec as ClassificationInterface } from './NativeClassification';
 import { Spec as ObjectDetectionInterface } from './NativeObjectDetection';
 import { Spec as StyleTransferInterface } from './NativeStyleTransfer';
+import { Spec as ImageSegmentationInterface } from './NativeImageSegmentation';
 import { Spec as ETModuleInterface } from './NativeETModule';
 import { Spec as OCRInterface } from './NativeOCR';
 import { Spec as VerticalOCRInterface } from './NativeVerticalOCR';
@@ -51,6 +52,19 @@ const Classification = ClassificationSpec
       }
     );
 
+const ImageSegmentationSpec = require('./NativeImageSegmentation').default;
+
+const ImageSegmentation = ImageSegmentationSpec
+  ? ImageSegmentationSpec
+  : new Proxy(
+      {},
+      {
+        get() {
+          throw new Error(LINKING_ERROR);
+        },
+      }
+    );
+
 const ObjectDetectionSpec = require('./NativeObjectDetection').default;
 
 const ObjectDetection = ObjectDetectionSpec
@@ -116,6 +130,19 @@ const VerticalOCR = VerticalOCRSpec
       }
     );
 
+class _ImageSegmentationModule {
+  async forward(
+    input: string
+  ): ReturnType<ImageSegmentationInterface['forward']> {
+    return await ImageSegmentation.forward(input);
+  }
+  async loadModule(
+    modelSource: string | number
+  ): ReturnType<ImageSegmentationInterface['loadModule']> {
+    return await ImageSegmentation.loadModule(modelSource);
+  }
+}
+
 class _ObjectDetectionModule {
   async forward(
     input: string
@@ -239,12 +266,14 @@ export {
   Classification,
   ObjectDetection,
   StyleTransfer,
+  ImageSegmentation,
   SpeechToText,
   OCR,
   VerticalOCR,
   _ETModule,
   _ClassificationModule,
   _StyleTransferModule,
+  _ImageSegmentationModule,
   _ObjectDetectionModule,
   _SpeechToTextModule,
   _OCRModule,

From cda498682e415560c3a52d001108b12275b4c75f Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Tue, 4 Mar 2025 11:12:55 +0100
Subject: [PATCH 2/9] Add working ios-native model

---
 ios/RnExecutorch/ImageSegmentation.mm         |  30 +++---
 .../classification/ClassificationModel.mm     |   2 +-
 .../models/image_segmentation/Constants.h     |   5 +
 .../models/image_segmentation/Constants.mm    |  10 ++
 .../ImageSegmentationModel.h                  |   3 +
 .../ImageSegmentationModel.mm                 | 102 ++++++++++++++++++
 .../Utils.h => utils/Numerical.h}             |   0
 .../Utils.mm => utils/Numerical.mm}           |   0
 .../computer_vision/useImageSegmentation.ts   |   2 +-
 src/modules/computer_vision/BaseCVModule.ts   |   2 +
 src/native/NativeImageSegmentation.ts         |   2 +-
 11 files changed, 140 insertions(+), 18 deletions(-)
 create mode 100644 ios/RnExecutorch/models/image_segmentation/Constants.h
 create mode 100644 ios/RnExecutorch/models/image_segmentation/Constants.mm
 rename ios/RnExecutorch/{models/classification/Utils.h => utils/Numerical.h} (100%)
 rename ios/RnExecutorch/{models/classification/Utils.mm => utils/Numerical.mm} (100%)

diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm
index ef526087..df972ab2 100644
--- a/ios/RnExecutorch/ImageSegmentation.mm
+++ b/ios/RnExecutorch/ImageSegmentation.mm
@@ -4,6 +4,8 @@
 #import "utils/ETError.h"
 #import <ExecutorchLib/ETModel.h>
 #import <React/RCTBridgeModule.h>
+#import <opencv2/opencv.hpp>
+#import "ImageProcessor.h"
 
 @implementation ImageSegmentation {
   ImageSegmentationModel *model;
@@ -15,7 +17,6 @@ - (void)loadModule:(NSString *)modelSource
            resolve:(RCTPromiseResolveBlock)resolve
             reject:(RCTPromiseRejectBlock)reject {
 
-  NSLog(@"Segmentation: loadModule");
   model = [[ImageSegmentationModel alloc] init];
   [model
        loadModel:[NSURL URLWithString:modelSource]
@@ -35,20 +36,19 @@ - (void)loadModule:(NSString *)modelSource
 - (void)forward:(NSString *)input
         resolve:(RCTPromiseResolveBlock)resolve
          reject:(RCTPromiseRejectBlock)reject {
-    NSLog(@"Segmentation: forward");
-//   @try {
-//     cv::Mat image = [ImageProcessor readImage:input];
-//     cv::Mat resultImage = [model runModel:image];
-
-//     NSString *tempFilePath = [ImageProcessor saveToTempFile:resultImage];
-//     resolve(tempFilePath);
-//     return;
-//   } @catch (NSException *exception) {
-//     NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason);
-//     reject(@"forward_error",
-//            [NSString stringWithFormat:@"%@", exception.reason], nil);
-//     return;
-//   }
+
+  @try {
+    cv::Mat image = [ImageProcessor readImage:input];
+    NSDictionary *result= [model runModel:image];
+
+    resolve(result);
+    return;
+  } @catch (NSException *exception) {
+    NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason);
+    reject(@"forward_error",
+           [NSString stringWithFormat:@"%@", exception.reason], nil);
+    return;
+  }
 }
 
 - (std::shared_ptr<facebook::react::TurboModule>)getTurboModule:
diff --git a/ios/RnExecutorch/models/classification/ClassificationModel.mm b/ios/RnExecutorch/models/classification/ClassificationModel.mm
index 8e7973e2..0306e67c 100644
--- a/ios/RnExecutorch/models/classification/ClassificationModel.mm
+++ b/ios/RnExecutorch/models/classification/ClassificationModel.mm
@@ -1,7 +1,7 @@
 #import "ClassificationModel.h"
 #import "../../utils/ImageProcessor.h"
+#import "../../utils/Numerical.h"
 #import "Constants.h"
-#import "Utils.h"
 #import "opencv2/opencv.hpp"
 
 @implementation ClassificationModel
diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.h b/ios/RnExecutorch/models/image_segmentation/Constants.h
new file mode 100644
index 00000000..889556d7
--- /dev/null
+++ b/ios/RnExecutorch/models/image_segmentation/Constants.h
@@ -0,0 +1,5 @@
+#import <string>
+#import <vector>
+
+
+extern const std::vector<std::string> deeplabv3_resnet50_labels;
diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.mm b/ios/RnExecutorch/models/image_segmentation/Constants.mm
new file mode 100644
index 00000000..4d98f34d
--- /dev/null
+++ b/ios/RnExecutorch/models/image_segmentation/Constants.mm
@@ -0,0 +1,10 @@
+#import "Constants.h"
+#import <string>
+#import <vector>
+
+const std::vector<std::string> deeplabv3_resnet50_labels = {
+    "background", "aeroplane", "bicycle", "bird", "boat",
+    "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
+    "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
+    "sofa", "train", "tvmonitor"
+};
\ No newline at end of file
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
index 0e02a94c..66dfb4b6 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
@@ -1,5 +1,8 @@
 #import "../BaseModel.h"
+#import "opencv2/opencv.hpp"
 
 @interface ImageSegmentationModel : BaseModel
+- (cv::Size)getModelImageSize;
+- (NSDictionary *)runModel:(cv::Mat &)input;
 
 @end
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index 8212bdee..f504cf12 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -1,6 +1,108 @@
 #import "ImageSegmentationModel.h"
+#import "../../utils/ImageProcessor.h"
+#import "../../utils/Numerical.h"
+#import "opencv2/opencv.hpp"
+#import "Constants.h"
+
+@interface ImageSegmentationModel ()
+  - (NSArray *)preprocess:(cv::Mat &)input;
+  - (NSDictionary *)postprocess:(NSArray *)output;
+@end
 
 @implementation ImageSegmentationModel {
+  cv::Size originalSize;
+}
+
+- (cv::Size)getModelImageSize {
+  NSArray *inputShape = [module getInputShape:@0];
+  NSNumber *widthNumber = inputShape.lastObject;
+  NSNumber *heightNumber = inputShape[inputShape.count - 2];
+
+  int height = [heightNumber intValue];
+  int width = [widthNumber intValue];
+
+  return cv::Size(height, width);
+}
+
+- (NSArray *)preprocess:(cv::Mat &)input {
+  originalSize = cv::Size(input.cols, input.rows);
+
+  cv::Size modelImageSize = [self getModelImageSize];
+  cv::Mat output;
+  cv::resize(input, output, modelImageSize);
+
+  NSArray *modelInput = [ImageProcessor matToNSArray:output];
+  return modelInput;
+}
+
+- (NSDictionary *)postprocess:(NSArray *)output {
+  cv::Size modelImageSize = [self getModelImageSize];
+
+  std::size_t numLabels = deeplabv3_resnet50_labels.size();
+  std::size_t numModelPixels = modelImageSize.height * modelImageSize.width;
+  std::size_t numOriginalPixels = originalSize.height * originalSize.width;
+  std::size_t outputSize = (std::size_t)output.count;
+
+  NSAssert(outputSize ==  numLabels * numModelPixels, 
+        @"Model generated unexpected output size.");
+
+
+  // For each label extract it's matrix and rescale it to the original size
+  std::vector<cv::Mat> resizedLabelScores(numLabels);
+  for (std::size_t label = 0; label < numLabels; ++label) {
+    cv::Mat labelMat = cv::Mat(modelImageSize, CV_64F);
+
+    for(std::size_t pixel = 0; pixel < numModelPixels; ++pixel){
+      int row = pixel / modelImageSize.width;
+      int col = pixel % modelImageSize.width;
+      labelMat.at<double>(row, col) = [output[label * numModelPixels + pixel] doubleValue];
+    }
+
+    cv::resize(labelMat, resizedLabelScores[label], originalSize);
+  }
+
+  // For each pixel apply softmax across all the labels
+  for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) {
+    int row = pixel / originalSize.width;
+    int col = pixel % originalSize.width;
+    std::vector<double> scores;
+    scores.reserve(numLabels);
+    for (const cv::Mat& mat : resizedLabelScores) {
+      scores.push_back(mat.at<double>(row, col));
+    }
+
+    std::vector<double> adjustedScores = softmax(scores);
+
+    for (std::size_t label = 0; label < numLabels; ++label) {
+      resizedLabelScores[label].at<double>(row, col) = adjustedScores[label];
+    }
+  }
+
+  NSMutableDictionary *result = [NSMutableDictionary dictionary];
+  
+  for (std::size_t label = 0; label < numLabels; ++label) {
+    NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str());
+    NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numOriginalPixels];
+
+    for (std::size_t x = 0; x < originalSize.height; ++x) {
+        for (std::size_t y = 0; y < originalSize.width; ++y) {
+            arr[x * originalSize.width + y] = @(resizedLabelScores[label].at<double>(x, y));
+        }
+    }
+
+    result[labelString] = arr;
+  }
+
+  return result;
+}
+
+- (NSDictionary *)runModel:(cv::Mat &)input {
+  NSArray *modelInput = [self preprocess:input];
+  NSArray *result = [self forward:modelInput];
+
+  NSDictionary *output = [self postprocess:result[0]];
+
+  return output;
 }
 
 @end
diff --git a/ios/RnExecutorch/models/classification/Utils.h b/ios/RnExecutorch/utils/Numerical.h
similarity index 100%
rename from ios/RnExecutorch/models/classification/Utils.h
rename to ios/RnExecutorch/utils/Numerical.h
diff --git a/ios/RnExecutorch/models/classification/Utils.mm b/ios/RnExecutorch/utils/Numerical.mm
similarity index 100%
rename from ios/RnExecutorch/models/classification/Utils.mm
rename to ios/RnExecutorch/utils/Numerical.mm
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
index 2134b624..746a45e0 100644
--- a/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -13,7 +13,7 @@ export const useImageSegmentation = ({
   isReady: boolean;
   isGenerating: boolean;
   downloadProgress: number;
-  forward: (input: string) => Promise<string>;
+  forward: (input: string) => Promise<{ [category: string]: number[] }>;
 } => {
   const [module, _] = useState(() => new _ImageSegmentationModule());
   const {
diff --git a/src/modules/computer_vision/BaseCVModule.ts b/src/modules/computer_vision/BaseCVModule.ts
index c61987d3..2702a6f9 100644
--- a/src/modules/computer_vision/BaseCVModule.ts
+++ b/src/modules/computer_vision/BaseCVModule.ts
@@ -3,6 +3,7 @@ import {
   _StyleTransferModule,
   _ObjectDetectionModule,
   _ClassificationModule,
+  _ImageSegmentationModule,
 } from '../../native/RnExecutorchModules';
 import { getError } from '../../Error';
 
@@ -10,6 +11,7 @@ export class BaseCVModule extends BaseModule {
   static module:
     | _StyleTransferModule
     | _ObjectDetectionModule
+    | _ImageSegmentationModule
     | _ClassificationModule;
 
   static async forward(input: string) {
diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts
index 1dcc9c27..c65b4cb7 100644
--- a/src/native/NativeImageSegmentation.ts
+++ b/src/native/NativeImageSegmentation.ts
@@ -4,7 +4,7 @@ import { TurboModuleRegistry } from 'react-native';
 export interface Spec extends TurboModule {
   loadModule(modelSource: string): Promise<number>;
 
-  forward(input: string): Promise<string>;
+  forward(input: string): Promise<{ [category: string]: number[] }>;
 }
 
 export default TurboModuleRegistry.get<Spec>('ImageSegmentation');

From c51b89638bf66602fc0f652e9de0dce6fcb01f1f Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Tue, 4 Mar 2025 13:21:43 +0100
Subject: [PATCH 3/9] Add arg max map to the segmentation result

---
 .../ImageSegmentationModel.h                  | 13 +++++++++++
 .../ImageSegmentationModel.mm                 | 23 +++++++++++--------
 .../image_segmentation/image_segmentation.ts  | 23 +++++++++++++++++++
 3 files changed, 50 insertions(+), 9 deletions(-)
 create mode 100644 src/constants/image_segmentation/image_segmentation.ts

diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
index 66dfb4b6..95535ba0 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
@@ -6,3 +6,16 @@
 - (NSDictionary *)runModel:(cv::Mat &)input;
 
 @end
+
+template <typename T>
+NSArray* matToNSArray(const cv::Mat& mat) {
+    std::size_t numPixels = mat.rows * mat.cols;
+    NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels];
+
+    for (std::size_t x = 0; x < mat.rows; ++x) {
+        for (std::size_t y = 0; y < mat.cols; ++y) {
+            arr[x * mat.cols + y] = @(mat.at<T>(x, y));
+        }
+    }
+    return arr;
+}
\ No newline at end of file
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index f504cf12..52707a4e 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -46,7 +46,6 @@ - (NSDictionary *)postprocess:(NSArray *)output {
   NSAssert(outputSize ==  numLabels * numModelPixels, 
         @"Model generated unexpected output size.");
 
-
   // For each label extract it's matrix and rescale it to the original size
   std::vector<cv::Mat> resizedLabelScores(numLabels);
   for (std::size_t label = 0; label < numLabels; ++label) {
@@ -61,6 +60,8 @@ - (NSDictionary *)postprocess:(NSArray *)output {
     cv::resize(labelMat, resizedLabelScores[label], originalSize);
   }
 
+  cv::Mat maxArg = cv::Mat(originalSize, CV_32S);
+
   // For each pixel apply softmax across all the labels
   for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) {
     int row = pixel / originalSize.width;
@@ -73,26 +74,30 @@ - (NSDictionary *)postprocess:(NSArray *)output {
 
     std::vector<double> adjustedScores = softmax(scores);
 
+    std::size_t maxArgIndex = 0;
+    double maxArgVal = 0;
     for (std::size_t label = 0; label < numLabels; ++label) {
       resizedLabelScores[label].at<double>(row, col) = adjustedScores[label];
+      if (adjustedScores[label] > maxArgVal) {
+        maxArgIndex = label;
+        maxArgVal = adjustedScores[label];
+      }
     }
+
+    maxArg.at<int>(row, col) = maxArgIndex;
   }
 
   NSMutableDictionary *result = [NSMutableDictionary dictionary];
   
+  // Convert to NSArray and populate the final dictionary
   for (std::size_t label = 0; label < numLabels; ++label) {
     NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str());
-    NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numOriginalPixels];
-
-    for (std::size_t x = 0; x < originalSize.height; ++x) {
-        for (std::size_t y = 0; y < originalSize.width; ++y) {
-            arr[x * originalSize.width + y] = @(resizedLabelScores[label].at<double>(x, y));
-        }
-    }
-
+    NSMutableArray *arr = matToNSArray<double>(resizedLabelScores[label]);
     result[labelString] = arr;
   }
 
+  result[@"argmax"] = matToNSArray<int>(maxArg);
+
   return result;
 }
 
diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts
new file mode 100644
index 00000000..b2f15f44
--- /dev/null
+++ b/src/constants/image_segmentation/image_segmentation.ts
@@ -0,0 +1,23 @@
+export const classLabels = new Map<number, string>([
+  [0, 'background'],
+  [1, 'aeroplane'],
+  [2, 'bicycle'],
+  [3, 'bird'],
+  [4, 'boat'],
+  [5, 'bottle'],
+  [6, 'bus'],
+  [7, 'car'],
+  [8, 'cat'],
+  [9, 'chair'],
+  [10, 'cow'],
+  [11, 'diningtable'],
+  [12, 'dog'],
+  [13, 'horse'],
+  [14, 'motorbike'],
+  [15, 'person'],
+  [16, 'pottedplant'],
+  [17, 'sheep'],
+  [18, 'sofa'],
+  [19, 'train'],
+  [20, 'tvmonitor'],
+]);

From 3d04da27351086de39ff520be91181794222eaf6 Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Wed, 5 Mar 2025 12:14:27 +0100
Subject: [PATCH 4/9] Add a way to filter what segmentation classes are
 returned

---
 ios/RnExecutorch/ImageSegmentation.mm         |  3 +-
 .../ImageSegmentationModel.h                  |  3 +-
 .../ImageSegmentationModel.mm                 | 28 ++++++---
 .../computer_vision/useImageSegmentation.ts   | 60 +++++++++++++++----
 src/modules/computer_vision/BaseCVModule.ts   |  2 -
 .../ImageSegmentationModule.ts                | 19 ++++--
 src/native/NativeImageSegmentation.ts         |  5 +-
 src/native/RnExecutorchModules.ts             |  5 +-
 8 files changed, 91 insertions(+), 34 deletions(-)

diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm
index df972ab2..cf9366d5 100644
--- a/ios/RnExecutorch/ImageSegmentation.mm
+++ b/ios/RnExecutorch/ImageSegmentation.mm
@@ -34,12 +34,13 @@ - (void)loadModule:(NSString *)modelSource
 }
 
 - (void)forward:(NSString *)input
+        classesOfInterest:(NSArray *)classesOfInterest
         resolve:(RCTPromiseResolveBlock)resolve
          reject:(RCTPromiseRejectBlock)reject {
 
   @try {
     cv::Mat image = [ImageProcessor readImage:input];
-    NSDictionary *result= [model runModel:image];
+    NSDictionary *result = [model runModel:image returnClasses:classesOfInterest];
 
     resolve(result);
     return;
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
index 95535ba0..91d1afc0 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
@@ -3,7 +3,8 @@
 
 @interface ImageSegmentationModel : BaseModel
 - (cv::Size)getModelImageSize;
-- (NSDictionary *)runModel:(cv::Mat &)input;
+- (NSDictionary *)runModel:(cv::Mat &)input
+                  returnClasses:(NSArray *)classesOfInterest;
 
 @end
 
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index 52707a4e..9c8959b2 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -1,4 +1,5 @@
 #import "ImageSegmentationModel.h"
+#import <unordered_set>
 #import "../../utils/ImageProcessor.h"
 #import "../../utils/Numerical.h"
 #import "opencv2/opencv.hpp"
@@ -6,7 +7,8 @@
 
 @interface ImageSegmentationModel ()
   - (NSArray *)preprocess:(cv::Mat &)input;
-  - (NSDictionary *)postprocess:(NSArray *)output;
+  - (NSDictionary *)postprocess:(NSArray *)output
+                    returnClasses:(NSArray *)classesOfInterest;
 @end
 
 @implementation ImageSegmentationModel {
@@ -35,7 +37,8 @@ - (NSArray *)preprocess:(cv::Mat &)input {
   return modelInput;
 }
 
-- (NSDictionary *)postprocess:(NSArray *)output {
+- (NSDictionary *)postprocess:(NSArray *)output
+                  returnClasses:(NSArray *)classesOfInterest{
   cv::Size modelImageSize = [self getModelImageSize];
 
   std::size_t numLabels = deeplabv3_resnet50_labels.size();
@@ -87,13 +90,21 @@ - (NSDictionary *)postprocess:(NSArray *)output {
     maxArg.at<int>(row, col) = maxArgIndex;
   }
 
+  std::unordered_set<std::string> labelSet;
+
+  for (id label in classesOfInterest) {
+      labelSet.insert(std::string([label UTF8String]));
+  }
+
   NSMutableDictionary *result = [NSMutableDictionary dictionary];
-  
+
   // Convert to NSArray and populate the final dictionary
   for (std::size_t label = 0; label < numLabels; ++label) {
-    NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str());
-    NSMutableArray *arr = matToNSArray<double>(resizedLabelScores[label]);
-    result[labelString] = arr;
+    if (labelSet.contains(deeplabv3_resnet50_labels[label])){
+        NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str());
+        NSArray *arr = matToNSArray<double>(resizedLabelScores[label]);
+        result[labelString] = arr;
+    }
   }
 
   result[@"argmax"] = matToNSArray<int>(maxArg);
@@ -101,11 +112,12 @@ - (NSDictionary *)postprocess:(NSArray *)output {
   return result;
 }
 
-- (NSDictionary *)runModel:(cv::Mat &)input {
+- (NSDictionary *)runModel:(cv::Mat &)input
+                  returnClasses:(NSArray *)classesOfInterest {
   NSArray *modelInput = [self preprocess:input];
   NSArray *result = [self forward:modelInput];
 
-  NSDictionary *output = [self postprocess:result[0]];
+  NSDictionary *output = [self postprocess:result[0] returnClasses:classesOfInterest];
 
   return output;
 }
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
index 746a45e0..5b583e5e 100644
--- a/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -1,6 +1,7 @@
-import { useState } from 'react';
+import { useState, useEffect } from 'react';
 import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
-import { useModule } from '../useModule';
+import { fetchResource } from '../../utils/fetchResource';
+import { ETError, getError } from '../../Error';
 
 interface Props {
   modelSource: string | number;
@@ -13,19 +14,52 @@ export const useImageSegmentation = ({
   isReady: boolean;
   isGenerating: boolean;
   downloadProgress: number;
-  forward: (input: string) => Promise<{ [category: string]: number[] }>;
+  forward: (
+    input: string,
+    classesOfInterest?: string[]
+  ) => Promise<{ [category: string]: number[] }>;
 } => {
   const [module, _] = useState(() => new _ImageSegmentationModule());
-  const {
-    error,
-    isReady,
-    isGenerating,
-    downloadProgress,
-    forwardImage: forward,
-  } = useModule({
-    modelSource,
-    module,
-  });
+  const [error, setError] = useState<null | string>(null);
+  const [isReady, setIsReady] = useState(false);
+  const [downloadProgress, setDownloadProgress] = useState(0);
+  const [isGenerating, setIsGenerating] = useState(false);
+
+  useEffect(() => {
+    const loadModel = async () => {
+      if (!modelSource) return;
+
+      try {
+        setIsReady(false);
+        const fileUri = await fetchResource(modelSource, setDownloadProgress);
+        await module.loadModule(fileUri);
+        setIsReady(true);
+      } catch (e) {
+        setError(getError(e));
+      }
+    };
+
+    loadModel();
+  }, [modelSource, module]);
+
+  const forward = async (input: string, classesOfInterest?: string[]) => {
+    if (!isReady) {
+      throw new Error(getError(ETError.ModuleNotLoaded));
+    }
+    if (isGenerating) {
+      throw new Error(getError(ETError.ModelGenerating));
+    }
+
+    try {
+      setIsGenerating(true);
+      const output = await module.forward(input, classesOfInterest || []);
+      return output;
+    } catch (e) {
+      throw new Error(getError(e));
+    } finally {
+      setIsGenerating(false);
+    }
+  };
 
   return { error, isReady, isGenerating, downloadProgress, forward };
 };
diff --git a/src/modules/computer_vision/BaseCVModule.ts b/src/modules/computer_vision/BaseCVModule.ts
index 2702a6f9..c61987d3 100644
--- a/src/modules/computer_vision/BaseCVModule.ts
+++ b/src/modules/computer_vision/BaseCVModule.ts
@@ -3,7 +3,6 @@ import {
   _StyleTransferModule,
   _ObjectDetectionModule,
   _ClassificationModule,
-  _ImageSegmentationModule,
 } from '../../native/RnExecutorchModules';
 import { getError } from '../../Error';
 
@@ -11,7 +10,6 @@ export class BaseCVModule extends BaseModule {
   static module:
     | _StyleTransferModule
     | _ObjectDetectionModule
-    | _ImageSegmentationModule
     | _ClassificationModule;
 
   static async forward(input: string) {
diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts
index eae124a8..041894c1 100644
--- a/src/modules/computer_vision/ImageSegmentationModule.ts
+++ b/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -1,12 +1,19 @@
-import { BaseCVModule } from './BaseCVModule';
+import { BaseModule } from '../BaseModule';
 import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
+import { getError } from '../../Error';
 
-export class ImageSegmentationModule extends BaseCVModule {
+export class ImageSegmentationModule extends BaseModule {
   static module = new _ImageSegmentationModule();
 
-  static async forward(input: string) {
-    return await (super.forward(input) as ReturnType<
-      _ImageSegmentationModule['forward']
-    >);
+  static async forward(input: string, classesOfInteres?: string[]) {
+    console.log('# classes: ', classesOfInteres?.length);
+    try {
+      return await (this.module.forward(
+        input,
+        classesOfInteres || []
+      ) as ReturnType<_ImageSegmentationModule['forward']>);
+    } catch (e) {
+      throw new Error(getError(e));
+    }
   }
 }
diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts
index c65b4cb7..ccff2731 100644
--- a/src/native/NativeImageSegmentation.ts
+++ b/src/native/NativeImageSegmentation.ts
@@ -4,7 +4,10 @@ import { TurboModuleRegistry } from 'react-native';
 export interface Spec extends TurboModule {
   loadModule(modelSource: string): Promise<number>;
 
-  forward(input: string): Promise<{ [category: string]: number[] }>;
+  forward(
+    input: string,
+    classesOfInterest: string[]
+  ): Promise<{ [category: string]: number[] }>;
 }
 
 export default TurboModuleRegistry.get<Spec>('ImageSegmentation');
diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts
index c48c08a3..eb6e7087 100644
--- a/src/native/RnExecutorchModules.ts
+++ b/src/native/RnExecutorchModules.ts
@@ -132,9 +132,10 @@ const VerticalOCR = VerticalOCRSpec
 
 class _ImageSegmentationModule {
   async forward(
-    input: string
+    input: string,
+    classesOfInteres: string[]
   ): ReturnType<ImageSegmentationInterface['forward']> {
-    return await ImageSegmentation.forward(input);
+    return await ImageSegmentation.forward(input, classesOfInteres);
   }
   async loadModule(
     modelSource: string | number

From b0f1eb9c02c85b276926954f9df89a7197cc81d9 Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Wed, 5 Mar 2025 13:29:50 +0100
Subject: [PATCH 5/9] Cleanup postprocess method

---
 .../ImageSegmentationModel.mm                 | 50 ++++++++++++-------
 1 file changed, 31 insertions(+), 19 deletions(-)

diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index 9c8959b2..de121b14 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -37,19 +37,10 @@ - (NSArray *)preprocess:(cv::Mat &)input {
   return modelInput;
 }
 
-- (NSDictionary *)postprocess:(NSArray *)output
-                  returnClasses:(NSArray *)classesOfInterest{
-  cv::Size modelImageSize = [self getModelImageSize];
-
-  std::size_t numLabels = deeplabv3_resnet50_labels.size();
+std::vector<cv::Mat> rescaleResults(NSArray *result, std::size_t numLabels, 
+                cv::Size modelImageSize, cv::Size originalSize) {
   std::size_t numModelPixels = modelImageSize.height * modelImageSize.width;
-  std::size_t numOriginalPixels = originalSize.height * originalSize.width;
-  std::size_t outputSize = (std::size_t)output.count;
-
-  NSAssert(outputSize ==  numLabels * numModelPixels, 
-        @"Model generated unexpected output size.");
 
-  // For each label extract it's matrix and rescale it to the original size
   std::vector<cv::Mat> resizedLabelScores(numLabels);
   for (std::size_t label = 0; label < numLabels; ++label) {
     cv::Mat labelMat = cv::Mat(modelImageSize, CV_64F);
@@ -57,30 +48,32 @@ - (NSDictionary *)postprocess:(NSArray *)output
     for(std::size_t pixel = 0; pixel < numModelPixels; ++pixel){
       int row = pixel / modelImageSize.width;
       int col = pixel % modelImageSize.width;
-      labelMat.at<double>(row, col) = [output[label * numModelPixels + pixel] doubleValue];
+      labelMat.at<double>(row, col) = [result[label * numModelPixels + pixel] doubleValue];
     }
 
     cv::resize(labelMat, resizedLabelScores[label], originalSize);
   }
+  return resizedLabelScores;
+}
 
-  cv::Mat maxArg = cv::Mat(originalSize, CV_32S);
-
-  // For each pixel apply softmax across all the labels
+void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& maxArg,
+                cv::Size originalSize, std::size_t numLabels) {
+  std::size_t numOriginalPixels = originalSize.height * originalSize.width;
   for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) {
     int row = pixel / originalSize.width;
     int col = pixel % originalSize.width;
     std::vector<double> scores;
     scores.reserve(numLabels);
-    for (const cv::Mat& mat : resizedLabelScores) {
+    for (const cv::Mat& mat : labelScores) {
       scores.push_back(mat.at<double>(row, col));
     }
-
+    
     std::vector<double> adjustedScores = softmax(scores);
-
+    
     std::size_t maxArgIndex = 0;
     double maxArgVal = 0;
     for (std::size_t label = 0; label < numLabels; ++label) {
-      resizedLabelScores[label].at<double>(row, col) = adjustedScores[label];
+      labelScores[label].at<double>(row, col) = adjustedScores[label];
       if (adjustedScores[label] > maxArgVal) {
         maxArgIndex = label;
         maxArgVal = adjustedScores[label];
@@ -89,6 +82,25 @@ - (NSDictionary *)postprocess:(NSArray *)output
 
     maxArg.at<int>(row, col) = maxArgIndex;
   }
+}
+
+- (NSDictionary *)postprocess:(NSArray *)output
+                  returnClasses:(NSArray *)classesOfInterest{
+  cv::Size modelImageSize = [self getModelImageSize];
+
+  std::size_t numLabels = deeplabv3_resnet50_labels.size();
+
+  NSAssert((std::size_t)output.count ==  numLabels * modelImageSize.height * modelImageSize.width, 
+        @"Model generated unexpected output size.");
+
+  // For each label extract it's matrix and rescale it to the original size
+  std::vector<cv::Mat> resizedLabelScores = 
+        rescaleResults(output, numLabels, modelImageSize, originalSize);
+
+  cv::Mat maxArg = cv::Mat(originalSize, CV_32S);
+
+  // For each pixel apply softmax across all the labels and calculate the maxArg
+  adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels);
 
   std::unordered_set<std::string> labelSet;
 

From 415c3e80c9c9f21553be8d8e20a17b2869fb2369 Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Fri, 7 Mar 2025 14:32:42 +0100
Subject: [PATCH 6/9] Move matrix functionality to utils

---
 .../ImageSegmentationModel.h                  | 15 +---------
 .../ImageSegmentationModel.mm                 |  7 +++--
 ios/RnExecutorch/utils/Conversions.h          | 15 ++++++++++
 .../computer_vision/useImageSegmentation.ts   | 28 ++++---------------
 .../ImageSegmentationModule.ts                |  1 -
 5 files changed, 26 insertions(+), 40 deletions(-)
 create mode 100644 ios/RnExecutorch/utils/Conversions.h

diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
index 91d1afc0..40b39583 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
@@ -6,17 +6,4 @@
 - (NSDictionary *)runModel:(cv::Mat &)input
                   returnClasses:(NSArray *)classesOfInterest;
 
-@end
-
-template <typename T>
-NSArray* matToNSArray(const cv::Mat& mat) {
-    std::size_t numPixels = mat.rows * mat.cols;
-    NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels];
-
-    for (std::size_t x = 0; x < mat.rows; ++x) {
-        for (std::size_t y = 0; y < mat.cols; ++y) {
-            arr[x * mat.cols + y] = @(mat.at<T>(x, y));
-        }
-    }
-    return arr;
-}
\ No newline at end of file
+@end
\ No newline at end of file
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index de121b14..1dc9fb7b 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -2,6 +2,7 @@
 #import <unordered_set>
 #import "../../utils/ImageProcessor.h"
 #import "../../utils/Numerical.h"
+#import "../../utils/Conversions.h"
 #import "opencv2/opencv.hpp"
 #import "Constants.h"
 
@@ -64,7 +65,7 @@ void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& maxArg,
     int col = pixel % originalSize.width;
     std::vector<double> scores;
     scores.reserve(numLabels);
-    for (const cv::Mat& mat : labelScores) {
+    for (const auto& mat : labelScores) {
       scores.push_back(mat.at<double>(row, col));
     }
     
@@ -114,12 +115,12 @@ - (NSDictionary *)postprocess:(NSArray *)output
   for (std::size_t label = 0; label < numLabels; ++label) {
     if (labelSet.contains(deeplabv3_resnet50_labels[label])){
         NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str());
-        NSArray *arr = matToNSArray<double>(resizedLabelScores[label]);
+        NSArray *arr = simpleMatToNSArray<double>(resizedLabelScores[label]);
         result[labelString] = arr;
     }
   }
 
-  result[@"argmax"] = matToNSArray<int>(maxArg);
+  result[@"argmax"] = simpleMatToNSArray<int>(maxArg);
 
   return result;
 }
diff --git a/ios/RnExecutorch/utils/Conversions.h b/ios/RnExecutorch/utils/Conversions.h
new file mode 100644
index 00000000..a83ec5fb
--- /dev/null
+++ b/ios/RnExecutorch/utils/Conversions.h
@@ -0,0 +1,15 @@
+#import "opencv2/opencv.hpp"
+
+// Convert a matrix containing a single value per cell to a NSArray
+template <typename T>
+NSArray* simpleMatToNSArray(const cv::Mat& mat) {
+    std::size_t numPixels = mat.rows * mat.cols;
+    NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels];
+
+    for (std::size_t x = 0; x < mat.rows; ++x) {
+        for (std::size_t y = 0; y < mat.cols; ++y) {
+            arr[x * mat.cols + y] = @(mat.at<T>(x, y));
+        }
+    }
+    return arr;
+}
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
index 5b583e5e..7b650d30 100644
--- a/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -1,7 +1,7 @@
-import { useState, useEffect } from 'react';
+import { useState } from 'react';
 import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
-import { fetchResource } from '../../utils/fetchResource';
 import { ETError, getError } from '../../Error';
+import { useModule } from '../useModule';
 
 interface Props {
   modelSource: string | number;
@@ -20,27 +20,11 @@ export const useImageSegmentation = ({
   ) => Promise<{ [category: string]: number[] }>;
 } => {
   const [module, _] = useState(() => new _ImageSegmentationModule());
-  const [error, setError] = useState<null | string>(null);
-  const [isReady, setIsReady] = useState(false);
-  const [downloadProgress, setDownloadProgress] = useState(0);
   const [isGenerating, setIsGenerating] = useState(false);
-
-  useEffect(() => {
-    const loadModel = async () => {
-      if (!modelSource) return;
-
-      try {
-        setIsReady(false);
-        const fileUri = await fetchResource(modelSource, setDownloadProgress);
-        await module.loadModule(fileUri);
-        setIsReady(true);
-      } catch (e) {
-        setError(getError(e));
-      }
-    };
-
-    loadModel();
-  }, [modelSource, module]);
+  const { error, isReady, downloadProgress } = useModule({
+    modelSource,
+    module,
+  });
 
   const forward = async (input: string, classesOfInterest?: string[]) => {
     if (!isReady) {
diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts
index 041894c1..f6646feb 100644
--- a/src/modules/computer_vision/ImageSegmentationModule.ts
+++ b/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -6,7 +6,6 @@ export class ImageSegmentationModule extends BaseModule {
   static module = new _ImageSegmentationModule();
 
   static async forward(input: string, classesOfInteres?: string[]) {
-    console.log('# classes: ', classesOfInteres?.length);
     try {
       return await (this.module.forward(
         input,

From 6da0ced2e78c2690bb5d65c4d8ba59309957687c Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Mon, 10 Mar 2025 11:34:30 +0100
Subject: [PATCH 7/9] Add label enum to segmentation I/O

---
 .../ImageSegmentationModel.mm                 | 23 ++++++++----------
 .../image_segmentation/image_segmentation.ts  | 23 ------------------
 .../computer_vision/useImageSegmentation.ts   | 23 ++++++++++++++----
 src/index.tsx                                 |  1 +
 .../ImageSegmentationModule.ts                |  9 ++++---
 src/types/image_segmentation.ts               | 24 +++++++++++++++++++
 6 files changed, 57 insertions(+), 46 deletions(-)
 delete mode 100644 src/constants/image_segmentation/image_segmentation.ts
 create mode 100644 src/types/image_segmentation.ts

diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index 1dc9fb7b..7aa10a9f 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -1,6 +1,8 @@
 #import "ImageSegmentationModel.h"
 #import <unordered_set>
-#import "../../utils/ImageProcessor.h"
+#import <algorithm>
+#import <vector>
+#i\port "../../utils/ImageProcessor.h"
 #import "../../utils/Numerical.h"
 #import "../../utils/Conversions.h"
 #import "opencv2/opencv.hpp"
@@ -57,7 +59,7 @@ - (NSArray *)preprocess:(cv::Mat &)input {
   return resizedLabelScores;
 }
 
-void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& maxArg,
+void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& argMax,
                 cv::Size originalSize, std::size_t numLabels) {
   std::size_t numOriginalPixels = originalSize.height * originalSize.width;
   for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) {
@@ -71,17 +73,12 @@ void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& maxArg,
     
     std::vector<double> adjustedScores = softmax(scores);
     
-    std::size_t maxArgIndex = 0;
-    double maxArgVal = 0;
     for (std::size_t label = 0; label < numLabels; ++label) {
       labelScores[label].at<double>(row, col) = adjustedScores[label];
-      if (adjustedScores[label] > maxArgVal) {
-        maxArgIndex = label;
-        maxArgVal = adjustedScores[label];
-      }
     }
 
-    maxArg.at<int>(row, col) = maxArgIndex;
+    auto maxIt = std::max_element(scores.begin(), scores.end());
+    argMax.at<int>(row, col) = std::distance(scores.begin(), maxIt);
   }
 }
 
@@ -98,10 +95,10 @@ - (NSDictionary *)postprocess:(NSArray *)output
   std::vector<cv::Mat> resizedLabelScores = 
         rescaleResults(output, numLabels, modelImageSize, originalSize);
 
-  cv::Mat maxArg = cv::Mat(originalSize, CV_32S);
+  cv::Mat argMax = cv::Mat(originalSize, CV_32S);
 
-  // For each pixel apply softmax across all the labels and calculate the maxArg
-  adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels);
+  // For each pixel apply softmax across all the labels and calculate the argMax
+  adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels);
 
   std::unordered_set<std::string> labelSet;
 
@@ -120,7 +117,7 @@ - (NSDictionary *)postprocess:(NSArray *)output
     }
   }
 
-  result[@"argmax"] = simpleMatToNSArray<int>(maxArg);
+  result[@"argmax"] = simpleMatToNSArray<int>(argMax);
 
   return result;
 }
diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts
deleted file mode 100644
index b2f15f44..00000000
--- a/src/constants/image_segmentation/image_segmentation.ts
+++ /dev/null
@@ -1,23 +0,0 @@
-export const classLabels = new Map<number, string>([
-  [0, 'background'],
-  [1, 'aeroplane'],
-  [2, 'bicycle'],
-  [3, 'bird'],
-  [4, 'boat'],
-  [5, 'bottle'],
-  [6, 'bus'],
-  [7, 'car'],
-  [8, 'cat'],
-  [9, 'chair'],
-  [10, 'cow'],
-  [11, 'diningtable'],
-  [12, 'dog'],
-  [13, 'horse'],
-  [14, 'motorbike'],
-  [15, 'person'],
-  [16, 'pottedplant'],
-  [17, 'sheep'],
-  [18, 'sofa'],
-  [19, 'train'],
-  [20, 'tvmonitor'],
-]);
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
index 7b650d30..43bd98f1 100644
--- a/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -2,6 +2,7 @@ import { useState } from 'react';
 import { _ImageSegmentationModule } from '../../native/RnExecutorchModules';
 import { ETError, getError } from '../../Error';
 import { useModule } from '../useModule';
+import { DeeplabLabel } from '../../types/image_segmentation';
 
 interface Props {
   modelSource: string | number;
@@ -16,8 +17,8 @@ export const useImageSegmentation = ({
   downloadProgress: number;
   forward: (
     input: string,
-    classesOfInterest?: string[]
-  ) => Promise<{ [category: string]: number[] }>;
+    classesOfInterest?: DeeplabLabel[]
+  ) => Promise<{ [key in DeeplabLabel]?: number[] }>;
 } => {
   const [module, _] = useState(() => new _ImageSegmentationModule());
   const [isGenerating, setIsGenerating] = useState(false);
@@ -26,7 +27,7 @@ export const useImageSegmentation = ({
     module,
   });
 
-  const forward = async (input: string, classesOfInterest?: string[]) => {
+  const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => {
     if (!isReady) {
       throw new Error(getError(ETError.ModuleNotLoaded));
     }
@@ -36,8 +37,20 @@ export const useImageSegmentation = ({
 
     try {
       setIsGenerating(true);
-      const output = await module.forward(input, classesOfInterest || []);
-      return output;
+      const stringDict = await module.forward(
+        input,
+        (classesOfInterest || []).map((label) => DeeplabLabel[label])
+      );
+
+      let enumDict: { [key in DeeplabLabel]?: number[] } = {};
+
+      for (const key in stringDict) {
+        if (key in DeeplabLabel) {
+          const enumKey = DeeplabLabel[key as keyof typeof DeeplabLabel];
+          enumDict[enumKey] = stringDict[key];
+        }
+      }
+      return enumDict;
     } catch (e) {
       throw new Error(getError(e));
     } finally {
diff --git a/src/index.tsx b/src/index.tsx
index 8d322696..c4ae2f55 100644
--- a/src/index.tsx
+++ b/src/index.tsx
@@ -30,6 +30,7 @@ export * from './utils/listDownloadedResources';
 // types
 export * from './types/object_detection';
 export * from './types/ocr';
+export * from './types/image_segmentation';
 
 // constants
 export * from './constants/modelUrls';
diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts
index f6646feb..f2a6a167 100644
--- a/src/modules/computer_vision/ImageSegmentationModule.ts
+++ b/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -5,12 +5,11 @@ import { getError } from '../../Error';
 export class ImageSegmentationModule extends BaseModule {
   static module = new _ImageSegmentationModule();
 
-  static async forward(input: string, classesOfInteres?: string[]) {
+  static async forward(input: string, classesOfInterest: string[]) {
     try {
-      return await (this.module.forward(
-        input,
-        classesOfInteres || []
-      ) as ReturnType<_ImageSegmentationModule['forward']>);
+      return await (this.module.forward(input, classesOfInterest) as ReturnType<
+        _ImageSegmentationModule['forward']
+      >);
     } catch (e) {
       throw new Error(getError(e));
     }
diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts
new file mode 100644
index 00000000..7d03d517
--- /dev/null
+++ b/src/types/image_segmentation.ts
@@ -0,0 +1,24 @@
+export enum DeeplabLabel {
+  background,
+  aeroplane,
+  bicycle,
+  bird,
+  boat,
+  bottle,
+  bus,
+  car,
+  cat,
+  chair,
+  cow,
+  diningtable,
+  dog,
+  horse,
+  motorbike,
+  person,
+  pottedplant,
+  sheep,
+  sofa,
+  train,
+  tvmonitor,
+  argmax, // Additional label not present in the model
+}

From d8d36df7c93bc1e78530816b381ccead2616872e Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Mon, 10 Mar 2025 12:30:04 +0100
Subject: [PATCH 8/9] Add optionality for segmentation output resize

---
 ios/RnExecutorch/ImageSegmentation.mm         |  9 ++--
 .../ImageSegmentationModel.h                  |  3 +-
 .../ImageSegmentationModel.mm                 | 46 ++++++++++++-------
 .../computer_vision/useImageSegmentation.ts   | 12 +++--
 .../ImageSegmentationModule.ts                | 14 ++++--
 src/native/NativeImageSegmentation.ts         |  3 +-
 src/native/RnExecutorchModules.ts             |  5 +-
 7 files changed, 61 insertions(+), 31 deletions(-)

diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm
index cf9366d5..19cbe664 100644
--- a/ios/RnExecutorch/ImageSegmentation.mm
+++ b/ios/RnExecutorch/ImageSegmentation.mm
@@ -35,13 +35,16 @@ - (void)loadModule:(NSString *)modelSource
 
 - (void)forward:(NSString *)input
         classesOfInterest:(NSArray *)classesOfInterest
+        resize:(BOOL)resize
         resolve:(RCTPromiseResolveBlock)resolve
-         reject:(RCTPromiseRejectBlock)reject {
+        reject:(RCTPromiseRejectBlock)reject {
 
   @try {
     cv::Mat image = [ImageProcessor readImage:input];
-    NSDictionary *result = [model runModel:image returnClasses:classesOfInterest];
-
+    NSDictionary *result = [model runModel:image 
+                                  returnClasses:classesOfInterest
+                                  resize:resize];
+                                  
     resolve(result);
     return;
   } @catch (NSException *exception) {
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
index 40b39583..a58733a1 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h
@@ -4,6 +4,7 @@
 @interface ImageSegmentationModel : BaseModel
 - (cv::Size)getModelImageSize;
 - (NSDictionary *)runModel:(cv::Mat &)input
-                  returnClasses:(NSArray *)classesOfInterest;
+                  returnClasses:(NSArray *)classesOfInterest
+                  resize:(BOOL)resize;
 
 @end
\ No newline at end of file
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index 7aa10a9f..70638bd4 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -2,7 +2,7 @@
 #import <unordered_set>
 #import <algorithm>
 #import <vector>
-#i\port "../../utils/ImageProcessor.h"
+#import "../../utils/ImageProcessor.h"
 #import "../../utils/Numerical.h"
 #import "../../utils/Conversions.h"
 #import "opencv2/opencv.hpp"
@@ -11,7 +11,8 @@
 @interface ImageSegmentationModel ()
   - (NSArray *)preprocess:(cv::Mat &)input;
   - (NSDictionary *)postprocess:(NSArray *)output
-                    returnClasses:(NSArray *)classesOfInterest;
+                    returnClasses:(NSArray *)classesOfInterest
+                    resize:(BOOL)resize;
 @end
 
 @implementation ImageSegmentationModel {
@@ -40,8 +41,8 @@ - (NSArray *)preprocess:(cv::Mat &)input {
   return modelInput;
 }
 
-std::vector<cv::Mat> rescaleResults(NSArray *result, std::size_t numLabels, 
-                cv::Size modelImageSize, cv::Size originalSize) {
+std::vector<cv::Mat> extractResults(NSArray *result, std::size_t numLabels, 
+                cv::Size modelImageSize, cv::Size originalSize, BOOL resize) {
   std::size_t numModelPixels = modelImageSize.height * modelImageSize.width;
 
   std::vector<cv::Mat> resizedLabelScores(numLabels);
@@ -54,17 +55,22 @@ - (NSArray *)preprocess:(cv::Mat &)input {
       labelMat.at<double>(row, col) = [result[label * numModelPixels + pixel] doubleValue];
     }
 
-    cv::resize(labelMat, resizedLabelScores[label], originalSize);
+    if (resize) {
+      cv::resize(labelMat, resizedLabelScores[label], originalSize);
+    }
+    else {
+      resizedLabelScores[label] = std::move(labelMat);
+    }
   }
   return resizedLabelScores;
 }
 
 void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& argMax,
-                cv::Size originalSize, std::size_t numLabels) {
-  std::size_t numOriginalPixels = originalSize.height * originalSize.width;
-  for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) {
-    int row = pixel / originalSize.width;
-    int col = pixel % originalSize.width;
+                cv::Size outputSize, std::size_t numLabels) {
+  std::size_t numOutputPixels = outputSize.height * outputSize.width;
+  for (std::size_t pixel = 0; pixel < numOutputPixels; ++pixel) {
+    int row = pixel / outputSize.width;
+    int col = pixel % outputSize.width;
     std::vector<double> scores;
     scores.reserve(numLabels);
     for (const auto& mat : labelScores) {
@@ -83,7 +89,8 @@ void adjustScoresPerPixel(std::vector<cv::Mat>& labelScores, cv::Mat& argMax,
 }
 
 - (NSDictionary *)postprocess:(NSArray *)output
-                  returnClasses:(NSArray *)classesOfInterest{
+                  returnClasses:(NSArray *)classesOfInterest
+                  resize:(BOOL)resize {
   cv::Size modelImageSize = [self getModelImageSize];
 
   std::size_t numLabels = deeplabv3_resnet50_labels.size();
@@ -91,14 +98,16 @@ - (NSDictionary *)postprocess:(NSArray *)output
   NSAssert((std::size_t)output.count ==  numLabels * modelImageSize.height * modelImageSize.width, 
         @"Model generated unexpected output size.");
 
-  // For each label extract it's matrix and rescale it to the original size
+  // For each label extract it's matrix,
+  // and rescale it to the original size if `resize`
   std::vector<cv::Mat> resizedLabelScores = 
-        rescaleResults(output, numLabels, modelImageSize, originalSize);
+        extractResults(output, numLabels, modelImageSize, originalSize, resize);
 
-  cv::Mat argMax = cv::Mat(originalSize, CV_32S);
+  cv::Size outputSize = resize ? originalSize : modelImageSize;
+  cv::Mat argMax = cv::Mat(outputSize, CV_32S);
 
   // For each pixel apply softmax across all the labels and calculate the argMax
-  adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels);
+  adjustScoresPerPixel(resizedLabelScores, argMax, outputSize, numLabels);
 
   std::unordered_set<std::string> labelSet;
 
@@ -123,11 +132,14 @@ - (NSDictionary *)postprocess:(NSArray *)output
 }
 
 - (NSDictionary *)runModel:(cv::Mat &)input
-                  returnClasses:(NSArray *)classesOfInterest {
+                  returnClasses:(NSArray *)classesOfInterest
+                  resize:(BOOL)resize {
   NSArray *modelInput = [self preprocess:input];
   NSArray *result = [self forward:modelInput];
 
-  NSDictionary *output = [self postprocess:result[0] returnClasses:classesOfInterest];
+  NSDictionary *output = [self postprocess:result[0]
+                               returnClasses:classesOfInterest
+                               resize:resize];
 
   return output;
 }
diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts
index 43bd98f1..4e562d6b 100644
--- a/src/hooks/computer_vision/useImageSegmentation.ts
+++ b/src/hooks/computer_vision/useImageSegmentation.ts
@@ -17,7 +17,8 @@ export const useImageSegmentation = ({
   downloadProgress: number;
   forward: (
     input: string,
-    classesOfInterest?: DeeplabLabel[]
+    classesOfInterest?: DeeplabLabel[],
+    resize?: boolean
   ) => Promise<{ [key in DeeplabLabel]?: number[] }>;
 } => {
   const [module, _] = useState(() => new _ImageSegmentationModule());
@@ -27,7 +28,11 @@ export const useImageSegmentation = ({
     module,
   });
 
-  const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => {
+  const forward = async (
+    input: string,
+    classesOfInterest?: DeeplabLabel[],
+    resize?: boolean
+  ) => {
     if (!isReady) {
       throw new Error(getError(ETError.ModuleNotLoaded));
     }
@@ -39,7 +44,8 @@ export const useImageSegmentation = ({
       setIsGenerating(true);
       const stringDict = await module.forward(
         input,
-        (classesOfInterest || []).map((label) => DeeplabLabel[label])
+        (classesOfInterest || []).map((label) => DeeplabLabel[label]),
+        resize || false
       );
 
       let enumDict: { [key in DeeplabLabel]?: number[] } = {};
diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts
index f2a6a167..1d078c1c 100644
--- a/src/modules/computer_vision/ImageSegmentationModule.ts
+++ b/src/modules/computer_vision/ImageSegmentationModule.ts
@@ -5,11 +5,17 @@ import { getError } from '../../Error';
 export class ImageSegmentationModule extends BaseModule {
   static module = new _ImageSegmentationModule();
 
-  static async forward(input: string, classesOfInterest: string[]) {
+  static async forward(
+    input: string,
+    classesOfInterest: string[],
+    resize: boolean
+  ) {
     try {
-      return await (this.module.forward(input, classesOfInterest) as ReturnType<
-        _ImageSegmentationModule['forward']
-      >);
+      return await (this.module.forward(
+        input,
+        classesOfInterest,
+        resize
+      ) as ReturnType<_ImageSegmentationModule['forward']>);
     } catch (e) {
       throw new Error(getError(e));
     }
diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts
index ccff2731..c66c8743 100644
--- a/src/native/NativeImageSegmentation.ts
+++ b/src/native/NativeImageSegmentation.ts
@@ -6,7 +6,8 @@ export interface Spec extends TurboModule {
 
   forward(
     input: string,
-    classesOfInterest: string[]
+    classesOfInterest: string[],
+    resize: boolean
   ): Promise<{ [category: string]: number[] }>;
 }
 
diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts
index eb6e7087..62ebd309 100644
--- a/src/native/RnExecutorchModules.ts
+++ b/src/native/RnExecutorchModules.ts
@@ -133,9 +133,10 @@ const VerticalOCR = VerticalOCRSpec
 class _ImageSegmentationModule {
   async forward(
     input: string,
-    classesOfInteres: string[]
+    classesOfInteres: string[],
+    resize: boolean
   ): ReturnType<ImageSegmentationInterface['forward']> {
-    return await ImageSegmentation.forward(input, classesOfInteres);
+    return await ImageSegmentation.forward(input, classesOfInteres, resize);
   }
   async loadModule(
     modelSource: string | number

From fbd1c85b994aa7cdba2381ecc67134fd8d2cf3fd Mon Sep 17 00:00:00 2001
From: Jakub Gonera <jakub.gonera@swmansion.com>
Date: Wed, 12 Mar 2025 15:12:41 +0100
Subject: [PATCH 9/9] Change segmentation enum values to upper case

---
 .../models/image_segmentation/Constants.mm    |  8 ++--
 .../ImageSegmentationModel.mm                 |  2 +-
 src/types/image_segmentation.ts               | 44 +++++++++----------
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.mm b/ios/RnExecutorch/models/image_segmentation/Constants.mm
index 4d98f34d..84ce9ea6 100644
--- a/ios/RnExecutorch/models/image_segmentation/Constants.mm
+++ b/ios/RnExecutorch/models/image_segmentation/Constants.mm
@@ -3,8 +3,8 @@
 #import <vector>
 
 const std::vector<std::string> deeplabv3_resnet50_labels = {
-    "background", "aeroplane", "bicycle", "bird", "boat",
-    "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
-    "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
-    "sofa", "train", "tvmonitor"
+    "BACKGROUND", "AEROPLANE", "BICYCLE", "BIRD", "BOAT",
+    "BOTTLE", "BUS", "CAR", "CAT", "CHAIR", "COW", "DININGTABLE",
+    "DOG", "HORSE", "MOTORBIKE", "PERSON", "POTTEDPLANT", "SHEEP",
+    "SOFA", "TRAIN", "TVMONITOR"
 };
\ No newline at end of file
diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
index 70638bd4..951687c5 100644
--- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
+++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm
@@ -126,7 +126,7 @@ - (NSDictionary *)postprocess:(NSArray *)output
     }
   }
 
-  result[@"argmax"] = simpleMatToNSArray<int>(argMax);
+  result[@"ARGMAX"] = simpleMatToNSArray<int>(argMax);
 
   return result;
 }
diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts
index 7d03d517..bc7d254d 100644
--- a/src/types/image_segmentation.ts
+++ b/src/types/image_segmentation.ts
@@ -1,24 +1,24 @@
 export enum DeeplabLabel {
-  background,
-  aeroplane,
-  bicycle,
-  bird,
-  boat,
-  bottle,
-  bus,
-  car,
-  cat,
-  chair,
-  cow,
-  diningtable,
-  dog,
-  horse,
-  motorbike,
-  person,
-  pottedplant,
-  sheep,
-  sofa,
-  train,
-  tvmonitor,
-  argmax, // Additional label not present in the model
+  BACKGROUND,
+  AEROPLANE,
+  BICYCLE,
+  BIRD,
+  BOAT,
+  BOTTLE,
+  BUS,
+  CAR,
+  CAT,
+  CHAIR,
+  COW,
+  DININGTABLE,
+  DOG,
+  HORSE,
+  MOTORBIKE,
+  PERSON,
+  POTTEDPLANT,
+  SHEEP,
+  SOFA,
+  TRAIN,
+  TVMONITOR,
+  ARGMAX, // Additional label not present in the model
 }