From 547a71840ddb0bda2b688426e2f0eebf8e00728a Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Thu, 27 Feb 2025 11:19:23 +0100 Subject: [PATCH 1/9] Add scaffolding for image segmentation native code --- ios/RnExecutorch/ImageSegmentation.h | 5 ++ ios/RnExecutorch/ImageSegmentation.mm | 59 +++++++++++++++++++ ios/RnExecutorch/StyleTransfer.mm | 2 +- .../ImageSegmentationModel.h | 5 ++ .../ImageSegmentationModel.mm | 6 ++ .../{ => style_transfer}/StyleTransferModel.h | 2 +- .../StyleTransferModel.mm | 2 +- .../computer_vision/useImageSegmentation.ts | 31 ++++++++++ src/index.tsx | 2 + src/modules/BaseModule.ts | 2 + .../ImageSegmentationModule.ts | 12 ++++ src/native/NativeImageSegmentation.ts | 10 ++++ src/native/RnExecutorchModules.ts | 29 +++++++++ 13 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 ios/RnExecutorch/ImageSegmentation.h create mode 100644 ios/RnExecutorch/ImageSegmentation.mm create mode 100644 ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h create mode 100644 ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm rename ios/RnExecutorch/models/{ => style_transfer}/StyleTransferModel.h (90%) rename ios/RnExecutorch/models/{ => style_transfer}/StyleTransferModel.mm (97%) create mode 100644 src/hooks/computer_vision/useImageSegmentation.ts create mode 100644 src/modules/computer_vision/ImageSegmentationModule.ts create mode 100644 src/native/NativeImageSegmentation.ts diff --git a/ios/RnExecutorch/ImageSegmentation.h b/ios/RnExecutorch/ImageSegmentation.h new file mode 100644 index 00000000..59ed56a4 --- /dev/null +++ b/ios/RnExecutorch/ImageSegmentation.h @@ -0,0 +1,5 @@ +#import + +@interface ImageSegmentation : NSObject + +@end \ No newline at end of file diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm new file mode 100644 index 00000000..ef526087 --- /dev/null +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -0,0 +1,59 @@ +#import "ImageSegmentation.h" +#import "models/image_segmentation/ImageSegmentationModel.h" +#import "models/BaseModel.h" +#import "utils/ETError.h" +#import +#import + +@implementation ImageSegmentation { + ImageSegmentationModel *model; +} + +RCT_EXPORT_MODULE() + +- (void)loadModule:(NSString *)modelSource + resolve:(RCTPromiseResolveBlock)resolve + reject:(RCTPromiseRejectBlock)reject { + + NSLog(@"Segmentation: loadModule"); + model = [[ImageSegmentationModel alloc] init]; + [model + loadModel:[NSURL URLWithString:modelSource] + completion:^(BOOL success, NSNumber *errorCode) { + if (success) { + resolve(errorCode); + return; + } + + reject(@"init_module_error", + [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]], + nil); + return; + }]; +} + +- (void)forward:(NSString *)input + resolve:(RCTPromiseResolveBlock)resolve + reject:(RCTPromiseRejectBlock)reject { + NSLog(@"Segmentation: forward"); +// @try { +// cv::Mat image = [ImageProcessor readImage:input]; +// cv::Mat resultImage = [model runModel:image]; + +// NSString *tempFilePath = [ImageProcessor saveToTempFile:resultImage]; +// resolve(tempFilePath); +// return; +// } @catch (NSException *exception) { +// NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason); +// reject(@"forward_error", +// [NSString stringWithFormat:@"%@", exception.reason], nil); +// return; +// } +} + +- (std::shared_ptr)getTurboModule: + (const facebook::react::ObjCTurboModule::InitParams &)params { + return std::make_shared(params); +} + +@end diff --git a/ios/RnExecutorch/StyleTransfer.mm b/ios/RnExecutorch/StyleTransfer.mm index 08e8d4a3..52930cd4 100644 --- a/ios/RnExecutorch/StyleTransfer.mm +++ b/ios/RnExecutorch/StyleTransfer.mm @@ -1,7 +1,7 @@ #import "StyleTransfer.h" #import "ImageProcessor.h" #import "models/BaseModel.h" -#import "models/StyleTransferModel.h" +#import "models/style_transfer/StyleTransferModel.h" #import "utils/ETError.h" #import #import diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h new file mode 100644 index 00000000..0e02a94c --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -0,0 +1,5 @@ +#import "../BaseModel.h" + +@interface ImageSegmentationModel : BaseModel + +@end diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm new file mode 100644 index 00000000..8212bdee --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -0,0 +1,6 @@ +#import "ImageSegmentationModel.h" + +@implementation ImageSegmentationModel { +} + +@end diff --git a/ios/RnExecutorch/models/StyleTransferModel.h b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h similarity index 90% rename from ios/RnExecutorch/models/StyleTransferModel.h rename to ios/RnExecutorch/models/style_transfer/StyleTransferModel.h index 1fd91d7b..20cdf6dd 100644 --- a/ios/RnExecutorch/models/StyleTransferModel.h +++ b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h @@ -1,4 +1,4 @@ -#import "BaseModel.h" +#import "../BaseModel.h" #import "opencv2/opencv.hpp" @interface StyleTransferModel : BaseModel diff --git a/ios/RnExecutorch/models/StyleTransferModel.mm b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm similarity index 97% rename from ios/RnExecutorch/models/StyleTransferModel.mm rename to ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm index 6051e24b..6a351431 100644 --- a/ios/RnExecutorch/models/StyleTransferModel.mm +++ b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm @@ -1,5 +1,5 @@ #import "StyleTransferModel.h" -#import "../utils/ImageProcessor.h" +#import "../../utils/ImageProcessor.h" #import "opencv2/opencv.hpp" @implementation StyleTransferModel { diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts new file mode 100644 index 00000000..2134b624 --- /dev/null +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -0,0 +1,31 @@ +import { useState } from 'react'; +import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; +import { useModule } from '../useModule'; + +interface Props { + modelSource: string | number; +} + +export const useImageSegmentation = ({ + modelSource, +}: Props): { + error: string | null; + isReady: boolean; + isGenerating: boolean; + downloadProgress: number; + forward: (input: string) => Promise; +} => { + const [module, _] = useState(() => new _ImageSegmentationModule()); + const { + error, + isReady, + isGenerating, + downloadProgress, + forwardImage: forward, + } = useModule({ + modelSource, + module, + }); + + return { error, isReady, isGenerating, downloadProgress, forward }; +}; diff --git a/src/index.tsx b/src/index.tsx index 7ae7a7ad..8d322696 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -2,6 +2,7 @@ export * from './hooks/computer_vision/useClassification'; export * from './hooks/computer_vision/useObjectDetection'; export * from './hooks/computer_vision/useStyleTransfer'; +export * from './hooks/computer_vision/useImageSegmentation'; export * from './hooks/computer_vision/useOCR'; export * from './hooks/computer_vision/useVerticalOCR'; @@ -14,6 +15,7 @@ export * from './hooks/general/useExecutorchModule'; export * from './modules/computer_vision/ClassificationModule'; export * from './modules/computer_vision/ObjectDetectionModule'; export * from './modules/computer_vision/StyleTransferModule'; +export * from './modules/computer_vision/ImageSegmentationModule'; export * from './modules/computer_vision/OCRModule'; export * from './modules/computer_vision/VerticalOCRModule'; diff --git a/src/modules/BaseModule.ts b/src/modules/BaseModule.ts index e977836f..56cf2e3d 100644 --- a/src/modules/BaseModule.ts +++ b/src/modules/BaseModule.ts @@ -1,4 +1,5 @@ import { + _ImageSegmentationModule, _StyleTransferModule, _ObjectDetectionModule, _ClassificationModule, @@ -10,6 +11,7 @@ import { getError } from '../Error'; export class BaseModule { static module: + | _ImageSegmentationModule | _StyleTransferModule | _ObjectDetectionModule | _ClassificationModule diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts new file mode 100644 index 00000000..eae124a8 --- /dev/null +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -0,0 +1,12 @@ +import { BaseCVModule } from './BaseCVModule'; +import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; + +export class ImageSegmentationModule extends BaseCVModule { + static module = new _ImageSegmentationModule(); + + static async forward(input: string) { + return await (super.forward(input) as ReturnType< + _ImageSegmentationModule['forward'] + >); + } +} diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts new file mode 100644 index 00000000..1dcc9c27 --- /dev/null +++ b/src/native/NativeImageSegmentation.ts @@ -0,0 +1,10 @@ +import type { TurboModule } from 'react-native'; +import { TurboModuleRegistry } from 'react-native'; + +export interface Spec extends TurboModule { + loadModule(modelSource: string): Promise; + + forward(input: string): Promise; +} + +export default TurboModuleRegistry.get('ImageSegmentation'); diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts index b1edcf52..c48c08a3 100644 --- a/src/native/RnExecutorchModules.ts +++ b/src/native/RnExecutorchModules.ts @@ -2,6 +2,7 @@ import { Platform } from 'react-native'; import { Spec as ClassificationInterface } from './NativeClassification'; import { Spec as ObjectDetectionInterface } from './NativeObjectDetection'; import { Spec as StyleTransferInterface } from './NativeStyleTransfer'; +import { Spec as ImageSegmentationInterface } from './NativeImageSegmentation'; import { Spec as ETModuleInterface } from './NativeETModule'; import { Spec as OCRInterface } from './NativeOCR'; import { Spec as VerticalOCRInterface } from './NativeVerticalOCR'; @@ -51,6 +52,19 @@ const Classification = ClassificationSpec } ); +const ImageSegmentationSpec = require('./NativeImageSegmentation').default; + +const ImageSegmentation = ImageSegmentationSpec + ? ImageSegmentationSpec + : new Proxy( + {}, + { + get() { + throw new Error(LINKING_ERROR); + }, + } + ); + const ObjectDetectionSpec = require('./NativeObjectDetection').default; const ObjectDetection = ObjectDetectionSpec @@ -116,6 +130,19 @@ const VerticalOCR = VerticalOCRSpec } ); +class _ImageSegmentationModule { + async forward( + input: string + ): ReturnType { + return await ImageSegmentation.forward(input); + } + async loadModule( + modelSource: string | number + ): ReturnType { + return await ImageSegmentation.loadModule(modelSource); + } +} + class _ObjectDetectionModule { async forward( input: string @@ -239,12 +266,14 @@ export { Classification, ObjectDetection, StyleTransfer, + ImageSegmentation, SpeechToText, OCR, VerticalOCR, _ETModule, _ClassificationModule, _StyleTransferModule, + _ImageSegmentationModule, _ObjectDetectionModule, _SpeechToTextModule, _OCRModule, From cda498682e415560c3a52d001108b12275b4c75f Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Tue, 4 Mar 2025 11:12:55 +0100 Subject: [PATCH 2/9] Add working ios-native model --- ios/RnExecutorch/ImageSegmentation.mm | 30 +++--- .../classification/ClassificationModel.mm | 2 +- .../models/image_segmentation/Constants.h | 5 + .../models/image_segmentation/Constants.mm | 10 ++ .../ImageSegmentationModel.h | 3 + .../ImageSegmentationModel.mm | 102 ++++++++++++++++++ .../Utils.h => utils/Numerical.h} | 0 .../Utils.mm => utils/Numerical.mm} | 0 .../computer_vision/useImageSegmentation.ts | 2 +- src/modules/computer_vision/BaseCVModule.ts | 2 + src/native/NativeImageSegmentation.ts | 2 +- 11 files changed, 140 insertions(+), 18 deletions(-) create mode 100644 ios/RnExecutorch/models/image_segmentation/Constants.h create mode 100644 ios/RnExecutorch/models/image_segmentation/Constants.mm rename ios/RnExecutorch/{models/classification/Utils.h => utils/Numerical.h} (100%) rename ios/RnExecutorch/{models/classification/Utils.mm => utils/Numerical.mm} (100%) diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm index ef526087..df972ab2 100644 --- a/ios/RnExecutorch/ImageSegmentation.mm +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -4,6 +4,8 @@ #import "utils/ETError.h" #import #import +#import +#import "ImageProcessor.h" @implementation ImageSegmentation { ImageSegmentationModel *model; @@ -15,7 +17,6 @@ - (void)loadModule:(NSString *)modelSource resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - NSLog(@"Segmentation: loadModule"); model = [[ImageSegmentationModel alloc] init]; [model loadModel:[NSURL URLWithString:modelSource] @@ -35,20 +36,19 @@ - (void)loadModule:(NSString *)modelSource - (void)forward:(NSString *)input resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - NSLog(@"Segmentation: forward"); -// @try { -// cv::Mat image = [ImageProcessor readImage:input]; -// cv::Mat resultImage = [model runModel:image]; - -// NSString *tempFilePath = [ImageProcessor saveToTempFile:resultImage]; -// resolve(tempFilePath); -// return; -// } @catch (NSException *exception) { -// NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason); -// reject(@"forward_error", -// [NSString stringWithFormat:@"%@", exception.reason], nil); -// return; -// } + + @try { + cv::Mat image = [ImageProcessor readImage:input]; + NSDictionary *result= [model runModel:image]; + + resolve(result); + return; + } @catch (NSException *exception) { + NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason); + reject(@"forward_error", + [NSString stringWithFormat:@"%@", exception.reason], nil); + return; + } } - (std::shared_ptr)getTurboModule: diff --git a/ios/RnExecutorch/models/classification/ClassificationModel.mm b/ios/RnExecutorch/models/classification/ClassificationModel.mm index 8e7973e2..0306e67c 100644 --- a/ios/RnExecutorch/models/classification/ClassificationModel.mm +++ b/ios/RnExecutorch/models/classification/ClassificationModel.mm @@ -1,7 +1,7 @@ #import "ClassificationModel.h" #import "../../utils/ImageProcessor.h" +#import "../../utils/Numerical.h" #import "Constants.h" -#import "Utils.h" #import "opencv2/opencv.hpp" @implementation ClassificationModel diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.h b/ios/RnExecutorch/models/image_segmentation/Constants.h new file mode 100644 index 00000000..889556d7 --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/Constants.h @@ -0,0 +1,5 @@ +#import +#import + + +extern const std::vector deeplabv3_resnet50_labels; diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.mm b/ios/RnExecutorch/models/image_segmentation/Constants.mm new file mode 100644 index 00000000..4d98f34d --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/Constants.mm @@ -0,0 +1,10 @@ +#import "Constants.h" +#import +#import + +const std::vector deeplabv3_resnet50_labels = { + "background", "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", + "dog", "horse", "motorbike", "person", "pottedplant", "sheep", + "sofa", "train", "tvmonitor" +}; \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 0e02a94c..66dfb4b6 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -1,5 +1,8 @@ #import "../BaseModel.h" +#import "opencv2/opencv.hpp" @interface ImageSegmentationModel : BaseModel +- (cv::Size)getModelImageSize; +- (NSDictionary *)runModel:(cv::Mat &)input; @end diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 8212bdee..f504cf12 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -1,6 +1,108 @@ #import "ImageSegmentationModel.h" +#import "../../utils/ImageProcessor.h" +#import "../../utils/Numerical.h" +#import "opencv2/opencv.hpp" +#import "Constants.h" + +@interface ImageSegmentationModel () + - (NSArray *)preprocess:(cv::Mat &)input; + - (NSDictionary *)postprocess:(NSArray *)output; +@end @implementation ImageSegmentationModel { + cv::Size originalSize; +} + +- (cv::Size)getModelImageSize { + NSArray *inputShape = [module getInputShape:@0]; + NSNumber *widthNumber = inputShape.lastObject; + NSNumber *heightNumber = inputShape[inputShape.count - 2]; + + int height = [heightNumber intValue]; + int width = [widthNumber intValue]; + + return cv::Size(height, width); +} + +- (NSArray *)preprocess:(cv::Mat &)input { + originalSize = cv::Size(input.cols, input.rows); + + cv::Size modelImageSize = [self getModelImageSize]; + cv::Mat output; + cv::resize(input, output, modelImageSize); + + NSArray *modelInput = [ImageProcessor matToNSArray:output]; + return modelInput; +} + +- (NSDictionary *)postprocess:(NSArray *)output { + cv::Size modelImageSize = [self getModelImageSize]; + + std::size_t numLabels = deeplabv3_resnet50_labels.size(); + std::size_t numModelPixels = modelImageSize.height * modelImageSize.width; + std::size_t numOriginalPixels = originalSize.height * originalSize.width; + std::size_t outputSize = (std::size_t)output.count; + + NSAssert(outputSize == numLabels * numModelPixels, + @"Model generated unexpected output size."); + + + // For each label extract it's matrix and rescale it to the original size + std::vector resizedLabelScores(numLabels); + for (std::size_t label = 0; label < numLabels; ++label) { + cv::Mat labelMat = cv::Mat(modelImageSize, CV_64F); + + for(std::size_t pixel = 0; pixel < numModelPixels; ++pixel){ + int row = pixel / modelImageSize.width; + int col = pixel % modelImageSize.width; + labelMat.at(row, col) = [output[label * numModelPixels + pixel] doubleValue]; + } + + cv::resize(labelMat, resizedLabelScores[label], originalSize); + } + + // For each pixel apply softmax across all the labels + for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { + int row = pixel / originalSize.width; + int col = pixel % originalSize.width; + std::vector scores; + scores.reserve(numLabels); + for (const cv::Mat& mat : resizedLabelScores) { + scores.push_back(mat.at(row, col)); + } + + std::vector adjustedScores = softmax(scores); + + for (std::size_t label = 0; label < numLabels; ++label) { + resizedLabelScores[label].at(row, col) = adjustedScores[label]; + } + } + + NSMutableDictionary *result = [NSMutableDictionary dictionary]; + + for (std::size_t label = 0; label < numLabels; ++label) { + NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); + NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numOriginalPixels]; + + for (std::size_t x = 0; x < originalSize.height; ++x) { + for (std::size_t y = 0; y < originalSize.width; ++y) { + arr[x * originalSize.width + y] = @(resizedLabelScores[label].at(x, y)); + } + } + + result[labelString] = arr; + } + + return result; +} + +- (NSDictionary *)runModel:(cv::Mat &)input { + NSArray *modelInput = [self preprocess:input]; + NSArray *result = [self forward:modelInput]; + + NSDictionary *output = [self postprocess:result[0]]; + + return output; } @end diff --git a/ios/RnExecutorch/models/classification/Utils.h b/ios/RnExecutorch/utils/Numerical.h similarity index 100% rename from ios/RnExecutorch/models/classification/Utils.h rename to ios/RnExecutorch/utils/Numerical.h diff --git a/ios/RnExecutorch/models/classification/Utils.mm b/ios/RnExecutorch/utils/Numerical.mm similarity index 100% rename from ios/RnExecutorch/models/classification/Utils.mm rename to ios/RnExecutorch/utils/Numerical.mm diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 2134b624..746a45e0 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -13,7 +13,7 @@ export const useImageSegmentation = ({ isReady: boolean; isGenerating: boolean; downloadProgress: number; - forward: (input: string) => Promise; + forward: (input: string) => Promise<{ [category: string]: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); const { diff --git a/src/modules/computer_vision/BaseCVModule.ts b/src/modules/computer_vision/BaseCVModule.ts index c61987d3..2702a6f9 100644 --- a/src/modules/computer_vision/BaseCVModule.ts +++ b/src/modules/computer_vision/BaseCVModule.ts @@ -3,6 +3,7 @@ import { _StyleTransferModule, _ObjectDetectionModule, _ClassificationModule, + _ImageSegmentationModule, } from '../../native/RnExecutorchModules'; import { getError } from '../../Error'; @@ -10,6 +11,7 @@ export class BaseCVModule extends BaseModule { static module: | _StyleTransferModule | _ObjectDetectionModule + | _ImageSegmentationModule | _ClassificationModule; static async forward(input: string) { diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts index 1dcc9c27..c65b4cb7 100644 --- a/src/native/NativeImageSegmentation.ts +++ b/src/native/NativeImageSegmentation.ts @@ -4,7 +4,7 @@ import { TurboModuleRegistry } from 'react-native'; export interface Spec extends TurboModule { loadModule(modelSource: string): Promise; - forward(input: string): Promise; + forward(input: string): Promise<{ [category: string]: number[] }>; } export default TurboModuleRegistry.get('ImageSegmentation'); From c51b89638bf66602fc0f652e9de0dce6fcb01f1f Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Tue, 4 Mar 2025 13:21:43 +0100 Subject: [PATCH 3/9] Add arg max map to the segmentation result --- .../ImageSegmentationModel.h | 13 +++++++++++ .../ImageSegmentationModel.mm | 23 +++++++++++-------- .../image_segmentation/image_segmentation.ts | 23 +++++++++++++++++++ 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 src/constants/image_segmentation/image_segmentation.ts diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 66dfb4b6..95535ba0 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -6,3 +6,16 @@ - (NSDictionary *)runModel:(cv::Mat &)input; @end + +template +NSArray* matToNSArray(const cv::Mat& mat) { + std::size_t numPixels = mat.rows * mat.cols; + NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels]; + + for (std::size_t x = 0; x < mat.rows; ++x) { + for (std::size_t y = 0; y < mat.cols; ++y) { + arr[x * mat.cols + y] = @(mat.at(x, y)); + } + } + return arr; +} \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index f504cf12..52707a4e 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -46,7 +46,6 @@ - (NSDictionary *)postprocess:(NSArray *)output { NSAssert(outputSize == numLabels * numModelPixels, @"Model generated unexpected output size."); - // For each label extract it's matrix and rescale it to the original size std::vector resizedLabelScores(numLabels); for (std::size_t label = 0; label < numLabels; ++label) { @@ -61,6 +60,8 @@ - (NSDictionary *)postprocess:(NSArray *)output { cv::resize(labelMat, resizedLabelScores[label], originalSize); } + cv::Mat maxArg = cv::Mat(originalSize, CV_32S); + // For each pixel apply softmax across all the labels for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { int row = pixel / originalSize.width; @@ -73,26 +74,30 @@ - (NSDictionary *)postprocess:(NSArray *)output { std::vector adjustedScores = softmax(scores); + std::size_t maxArgIndex = 0; + double maxArgVal = 0; for (std::size_t label = 0; label < numLabels; ++label) { resizedLabelScores[label].at(row, col) = adjustedScores[label]; + if (adjustedScores[label] > maxArgVal) { + maxArgIndex = label; + maxArgVal = adjustedScores[label]; + } } + + maxArg.at(row, col) = maxArgIndex; } NSMutableDictionary *result = [NSMutableDictionary dictionary]; + // Convert to NSArray and populate the final dictionary for (std::size_t label = 0; label < numLabels; ++label) { NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); - NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numOriginalPixels]; - - for (std::size_t x = 0; x < originalSize.height; ++x) { - for (std::size_t y = 0; y < originalSize.width; ++y) { - arr[x * originalSize.width + y] = @(resizedLabelScores[label].at(x, y)); - } - } - + NSMutableArray *arr = matToNSArray(resizedLabelScores[label]); result[labelString] = arr; } + result[@"argmax"] = matToNSArray(maxArg); + return result; } diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts new file mode 100644 index 00000000..b2f15f44 --- /dev/null +++ b/src/constants/image_segmentation/image_segmentation.ts @@ -0,0 +1,23 @@ +export const classLabels = new Map([ + [0, 'background'], + [1, 'aeroplane'], + [2, 'bicycle'], + [3, 'bird'], + [4, 'boat'], + [5, 'bottle'], + [6, 'bus'], + [7, 'car'], + [8, 'cat'], + [9, 'chair'], + [10, 'cow'], + [11, 'diningtable'], + [12, 'dog'], + [13, 'horse'], + [14, 'motorbike'], + [15, 'person'], + [16, 'pottedplant'], + [17, 'sheep'], + [18, 'sofa'], + [19, 'train'], + [20, 'tvmonitor'], +]); From 3d04da27351086de39ff520be91181794222eaf6 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 5 Mar 2025 12:14:27 +0100 Subject: [PATCH 4/9] Add a way to filter what segmentation classes are returned --- ios/RnExecutorch/ImageSegmentation.mm | 3 +- .../ImageSegmentationModel.h | 3 +- .../ImageSegmentationModel.mm | 28 ++++++--- .../computer_vision/useImageSegmentation.ts | 60 +++++++++++++++---- src/modules/computer_vision/BaseCVModule.ts | 2 - .../ImageSegmentationModule.ts | 19 ++++-- src/native/NativeImageSegmentation.ts | 5 +- src/native/RnExecutorchModules.ts | 5 +- 8 files changed, 91 insertions(+), 34 deletions(-) diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm index df972ab2..cf9366d5 100644 --- a/ios/RnExecutorch/ImageSegmentation.mm +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -34,12 +34,13 @@ - (void)loadModule:(NSString *)modelSource } - (void)forward:(NSString *)input + classesOfInterest:(NSArray *)classesOfInterest resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { @try { cv::Mat image = [ImageProcessor readImage:input]; - NSDictionary *result= [model runModel:image]; + NSDictionary *result = [model runModel:image returnClasses:classesOfInterest]; resolve(result); return; diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 95535ba0..91d1afc0 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -3,7 +3,8 @@ @interface ImageSegmentationModel : BaseModel - (cv::Size)getModelImageSize; -- (NSDictionary *)runModel:(cv::Mat &)input; +- (NSDictionary *)runModel:(cv::Mat &)input + returnClasses:(NSArray *)classesOfInterest; @end diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 52707a4e..9c8959b2 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -1,4 +1,5 @@ #import "ImageSegmentationModel.h" +#import #import "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" #import "opencv2/opencv.hpp" @@ -6,7 +7,8 @@ @interface ImageSegmentationModel () - (NSArray *)preprocess:(cv::Mat &)input; - - (NSDictionary *)postprocess:(NSArray *)output; + - (NSDictionary *)postprocess:(NSArray *)output + returnClasses:(NSArray *)classesOfInterest; @end @implementation ImageSegmentationModel { @@ -35,7 +37,8 @@ - (NSArray *)preprocess:(cv::Mat &)input { return modelInput; } -- (NSDictionary *)postprocess:(NSArray *)output { +- (NSDictionary *)postprocess:(NSArray *)output + returnClasses:(NSArray *)classesOfInterest{ cv::Size modelImageSize = [self getModelImageSize]; std::size_t numLabels = deeplabv3_resnet50_labels.size(); @@ -87,13 +90,21 @@ - (NSDictionary *)postprocess:(NSArray *)output { maxArg.at(row, col) = maxArgIndex; } + std::unordered_set labelSet; + + for (id label in classesOfInterest) { + labelSet.insert(std::string([label UTF8String])); + } + NSMutableDictionary *result = [NSMutableDictionary dictionary]; - + // Convert to NSArray and populate the final dictionary for (std::size_t label = 0; label < numLabels; ++label) { - NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); - NSMutableArray *arr = matToNSArray(resizedLabelScores[label]); - result[labelString] = arr; + if (labelSet.contains(deeplabv3_resnet50_labels[label])){ + NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); + NSArray *arr = matToNSArray(resizedLabelScores[label]); + result[labelString] = arr; + } } result[@"argmax"] = matToNSArray(maxArg); @@ -101,11 +112,12 @@ - (NSDictionary *)postprocess:(NSArray *)output { return result; } -- (NSDictionary *)runModel:(cv::Mat &)input { +- (NSDictionary *)runModel:(cv::Mat &)input + returnClasses:(NSArray *)classesOfInterest { NSArray *modelInput = [self preprocess:input]; NSArray *result = [self forward:modelInput]; - NSDictionary *output = [self postprocess:result[0]]; + NSDictionary *output = [self postprocess:result[0] returnClasses:classesOfInterest]; return output; } diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 746a45e0..5b583e5e 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -1,6 +1,7 @@ -import { useState } from 'react'; +import { useState, useEffect } from 'react'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; -import { useModule } from '../useModule'; +import { fetchResource } from '../../utils/fetchResource'; +import { ETError, getError } from '../../Error'; interface Props { modelSource: string | number; @@ -13,19 +14,52 @@ export const useImageSegmentation = ({ isReady: boolean; isGenerating: boolean; downloadProgress: number; - forward: (input: string) => Promise<{ [category: string]: number[] }>; + forward: ( + input: string, + classesOfInterest?: string[] + ) => Promise<{ [category: string]: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); - const { - error, - isReady, - isGenerating, - downloadProgress, - forwardImage: forward, - } = useModule({ - modelSource, - module, - }); + const [error, setError] = useState(null); + const [isReady, setIsReady] = useState(false); + const [downloadProgress, setDownloadProgress] = useState(0); + const [isGenerating, setIsGenerating] = useState(false); + + useEffect(() => { + const loadModel = async () => { + if (!modelSource) return; + + try { + setIsReady(false); + const fileUri = await fetchResource(modelSource, setDownloadProgress); + await module.loadModule(fileUri); + setIsReady(true); + } catch (e) { + setError(getError(e)); + } + }; + + loadModel(); + }, [modelSource, module]); + + const forward = async (input: string, classesOfInterest?: string[]) => { + if (!isReady) { + throw new Error(getError(ETError.ModuleNotLoaded)); + } + if (isGenerating) { + throw new Error(getError(ETError.ModelGenerating)); + } + + try { + setIsGenerating(true); + const output = await module.forward(input, classesOfInterest || []); + return output; + } catch (e) { + throw new Error(getError(e)); + } finally { + setIsGenerating(false); + } + }; return { error, isReady, isGenerating, downloadProgress, forward }; }; diff --git a/src/modules/computer_vision/BaseCVModule.ts b/src/modules/computer_vision/BaseCVModule.ts index 2702a6f9..c61987d3 100644 --- a/src/modules/computer_vision/BaseCVModule.ts +++ b/src/modules/computer_vision/BaseCVModule.ts @@ -3,7 +3,6 @@ import { _StyleTransferModule, _ObjectDetectionModule, _ClassificationModule, - _ImageSegmentationModule, } from '../../native/RnExecutorchModules'; import { getError } from '../../Error'; @@ -11,7 +10,6 @@ export class BaseCVModule extends BaseModule { static module: | _StyleTransferModule | _ObjectDetectionModule - | _ImageSegmentationModule | _ClassificationModule; static async forward(input: string) { diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index eae124a8..041894c1 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -1,12 +1,19 @@ -import { BaseCVModule } from './BaseCVModule'; +import { BaseModule } from '../BaseModule'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; +import { getError } from '../../Error'; -export class ImageSegmentationModule extends BaseCVModule { +export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); - static async forward(input: string) { - return await (super.forward(input) as ReturnType< - _ImageSegmentationModule['forward'] - >); + static async forward(input: string, classesOfInteres?: string[]) { + console.log('# classes: ', classesOfInteres?.length); + try { + return await (this.module.forward( + input, + classesOfInteres || [] + ) as ReturnType<_ImageSegmentationModule['forward']>); + } catch (e) { + throw new Error(getError(e)); + } } } diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts index c65b4cb7..ccff2731 100644 --- a/src/native/NativeImageSegmentation.ts +++ b/src/native/NativeImageSegmentation.ts @@ -4,7 +4,10 @@ import { TurboModuleRegistry } from 'react-native'; export interface Spec extends TurboModule { loadModule(modelSource: string): Promise; - forward(input: string): Promise<{ [category: string]: number[] }>; + forward( + input: string, + classesOfInterest: string[] + ): Promise<{ [category: string]: number[] }>; } export default TurboModuleRegistry.get('ImageSegmentation'); diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts index c48c08a3..eb6e7087 100644 --- a/src/native/RnExecutorchModules.ts +++ b/src/native/RnExecutorchModules.ts @@ -132,9 +132,10 @@ const VerticalOCR = VerticalOCRSpec class _ImageSegmentationModule { async forward( - input: string + input: string, + classesOfInteres: string[] ): ReturnType { - return await ImageSegmentation.forward(input); + return await ImageSegmentation.forward(input, classesOfInteres); } async loadModule( modelSource: string | number From b0f1eb9c02c85b276926954f9df89a7197cc81d9 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 5 Mar 2025 13:29:50 +0100 Subject: [PATCH 5/9] Cleanup postprocess method --- .../ImageSegmentationModel.mm | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 9c8959b2..de121b14 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -37,19 +37,10 @@ - (NSArray *)preprocess:(cv::Mat &)input { return modelInput; } -- (NSDictionary *)postprocess:(NSArray *)output - returnClasses:(NSArray *)classesOfInterest{ - cv::Size modelImageSize = [self getModelImageSize]; - - std::size_t numLabels = deeplabv3_resnet50_labels.size(); +std::vector rescaleResults(NSArray *result, std::size_t numLabels, + cv::Size modelImageSize, cv::Size originalSize) { std::size_t numModelPixels = modelImageSize.height * modelImageSize.width; - std::size_t numOriginalPixels = originalSize.height * originalSize.width; - std::size_t outputSize = (std::size_t)output.count; - - NSAssert(outputSize == numLabels * numModelPixels, - @"Model generated unexpected output size."); - // For each label extract it's matrix and rescale it to the original size std::vector resizedLabelScores(numLabels); for (std::size_t label = 0; label < numLabels; ++label) { cv::Mat labelMat = cv::Mat(modelImageSize, CV_64F); @@ -57,30 +48,32 @@ - (NSDictionary *)postprocess:(NSArray *)output for(std::size_t pixel = 0; pixel < numModelPixels; ++pixel){ int row = pixel / modelImageSize.width; int col = pixel % modelImageSize.width; - labelMat.at(row, col) = [output[label * numModelPixels + pixel] doubleValue]; + labelMat.at(row, col) = [result[label * numModelPixels + pixel] doubleValue]; } cv::resize(labelMat, resizedLabelScores[label], originalSize); } + return resizedLabelScores; +} - cv::Mat maxArg = cv::Mat(originalSize, CV_32S); - - // For each pixel apply softmax across all the labels +void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, + cv::Size originalSize, std::size_t numLabels) { + std::size_t numOriginalPixels = originalSize.height * originalSize.width; for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { int row = pixel / originalSize.width; int col = pixel % originalSize.width; std::vector scores; scores.reserve(numLabels); - for (const cv::Mat& mat : resizedLabelScores) { + for (const cv::Mat& mat : labelScores) { scores.push_back(mat.at(row, col)); } - + std::vector adjustedScores = softmax(scores); - + std::size_t maxArgIndex = 0; double maxArgVal = 0; for (std::size_t label = 0; label < numLabels; ++label) { - resizedLabelScores[label].at(row, col) = adjustedScores[label]; + labelScores[label].at(row, col) = adjustedScores[label]; if (adjustedScores[label] > maxArgVal) { maxArgIndex = label; maxArgVal = adjustedScores[label]; @@ -89,6 +82,25 @@ - (NSDictionary *)postprocess:(NSArray *)output maxArg.at(row, col) = maxArgIndex; } +} + +- (NSDictionary *)postprocess:(NSArray *)output + returnClasses:(NSArray *)classesOfInterest{ + cv::Size modelImageSize = [self getModelImageSize]; + + std::size_t numLabels = deeplabv3_resnet50_labels.size(); + + NSAssert((std::size_t)output.count == numLabels * modelImageSize.height * modelImageSize.width, + @"Model generated unexpected output size."); + + // For each label extract it's matrix and rescale it to the original size + std::vector resizedLabelScores = + rescaleResults(output, numLabels, modelImageSize, originalSize); + + cv::Mat maxArg = cv::Mat(originalSize, CV_32S); + + // For each pixel apply softmax across all the labels and calculate the maxArg + adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels); std::unordered_set labelSet; From 415c3e80c9c9f21553be8d8e20a17b2869fb2369 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Fri, 7 Mar 2025 14:32:42 +0100 Subject: [PATCH 6/9] Move matrix functionality to utils --- .../ImageSegmentationModel.h | 15 +--------- .../ImageSegmentationModel.mm | 7 +++-- ios/RnExecutorch/utils/Conversions.h | 15 ++++++++++ .../computer_vision/useImageSegmentation.ts | 28 ++++--------------- .../ImageSegmentationModule.ts | 1 - 5 files changed, 26 insertions(+), 40 deletions(-) create mode 100644 ios/RnExecutorch/utils/Conversions.h diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 91d1afc0..40b39583 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -6,17 +6,4 @@ - (NSDictionary *)runModel:(cv::Mat &)input returnClasses:(NSArray *)classesOfInterest; -@end - -template -NSArray* matToNSArray(const cv::Mat& mat) { - std::size_t numPixels = mat.rows * mat.cols; - NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels]; - - for (std::size_t x = 0; x < mat.rows; ++x) { - for (std::size_t y = 0; y < mat.cols; ++y) { - arr[x * mat.cols + y] = @(mat.at(x, y)); - } - } - return arr; -} \ No newline at end of file +@end \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index de121b14..1dc9fb7b 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -2,6 +2,7 @@ #import #import "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" +#import "../../utils/Conversions.h" #import "opencv2/opencv.hpp" #import "Constants.h" @@ -64,7 +65,7 @@ void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, int col = pixel % originalSize.width; std::vector scores; scores.reserve(numLabels); - for (const cv::Mat& mat : labelScores) { + for (const auto& mat : labelScores) { scores.push_back(mat.at(row, col)); } @@ -114,12 +115,12 @@ - (NSDictionary *)postprocess:(NSArray *)output for (std::size_t label = 0; label < numLabels; ++label) { if (labelSet.contains(deeplabv3_resnet50_labels[label])){ NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); - NSArray *arr = matToNSArray(resizedLabelScores[label]); + NSArray *arr = simpleMatToNSArray(resizedLabelScores[label]); result[labelString] = arr; } } - result[@"argmax"] = matToNSArray(maxArg); + result[@"argmax"] = simpleMatToNSArray(maxArg); return result; } diff --git a/ios/RnExecutorch/utils/Conversions.h b/ios/RnExecutorch/utils/Conversions.h new file mode 100644 index 00000000..a83ec5fb --- /dev/null +++ b/ios/RnExecutorch/utils/Conversions.h @@ -0,0 +1,15 @@ +#import "opencv2/opencv.hpp" + +// Convert a matrix containing a single value per cell to a NSArray +template +NSArray* simpleMatToNSArray(const cv::Mat& mat) { + std::size_t numPixels = mat.rows * mat.cols; + NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels]; + + for (std::size_t x = 0; x < mat.rows; ++x) { + for (std::size_t y = 0; y < mat.cols; ++y) { + arr[x * mat.cols + y] = @(mat.at(x, y)); + } + } + return arr; +} diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 5b583e5e..7b650d30 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -1,7 +1,7 @@ -import { useState, useEffect } from 'react'; +import { useState } from 'react'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; -import { fetchResource } from '../../utils/fetchResource'; import { ETError, getError } from '../../Error'; +import { useModule } from '../useModule'; interface Props { modelSource: string | number; @@ -20,27 +20,11 @@ export const useImageSegmentation = ({ ) => Promise<{ [category: string]: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); - const [error, setError] = useState(null); - const [isReady, setIsReady] = useState(false); - const [downloadProgress, setDownloadProgress] = useState(0); const [isGenerating, setIsGenerating] = useState(false); - - useEffect(() => { - const loadModel = async () => { - if (!modelSource) return; - - try { - setIsReady(false); - const fileUri = await fetchResource(modelSource, setDownloadProgress); - await module.loadModule(fileUri); - setIsReady(true); - } catch (e) { - setError(getError(e)); - } - }; - - loadModel(); - }, [modelSource, module]); + const { error, isReady, downloadProgress } = useModule({ + modelSource, + module, + }); const forward = async (input: string, classesOfInterest?: string[]) => { if (!isReady) { diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index 041894c1..f6646feb 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -6,7 +6,6 @@ export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); static async forward(input: string, classesOfInteres?: string[]) { - console.log('# classes: ', classesOfInteres?.length); try { return await (this.module.forward( input, From 6da0ced2e78c2690bb5d65c4d8ba59309957687c Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Mon, 10 Mar 2025 11:34:30 +0100 Subject: [PATCH 7/9] Add label enum to segmentation I/O --- .../ImageSegmentationModel.mm | 23 ++++++++---------- .../image_segmentation/image_segmentation.ts | 23 ------------------ .../computer_vision/useImageSegmentation.ts | 23 ++++++++++++++---- src/index.tsx | 1 + .../ImageSegmentationModule.ts | 9 ++++--- src/types/image_segmentation.ts | 24 +++++++++++++++++++ 6 files changed, 57 insertions(+), 46 deletions(-) delete mode 100644 src/constants/image_segmentation/image_segmentation.ts create mode 100644 src/types/image_segmentation.ts diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 1dc9fb7b..7aa10a9f 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -1,6 +1,8 @@ #import "ImageSegmentationModel.h" #import -#import "../../utils/ImageProcessor.h" +#import +#import +#i\port "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" #import "../../utils/Conversions.h" #import "opencv2/opencv.hpp" @@ -57,7 +59,7 @@ - (NSArray *)preprocess:(cv::Mat &)input { return resizedLabelScores; } -void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, +void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& argMax, cv::Size originalSize, std::size_t numLabels) { std::size_t numOriginalPixels = originalSize.height * originalSize.width; for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { @@ -71,17 +73,12 @@ void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, std::vector adjustedScores = softmax(scores); - std::size_t maxArgIndex = 0; - double maxArgVal = 0; for (std::size_t label = 0; label < numLabels; ++label) { labelScores[label].at(row, col) = adjustedScores[label]; - if (adjustedScores[label] > maxArgVal) { - maxArgIndex = label; - maxArgVal = adjustedScores[label]; - } } - maxArg.at(row, col) = maxArgIndex; + auto maxIt = std::max_element(scores.begin(), scores.end()); + argMax.at(row, col) = std::distance(scores.begin(), maxIt); } } @@ -98,10 +95,10 @@ - (NSDictionary *)postprocess:(NSArray *)output std::vector resizedLabelScores = rescaleResults(output, numLabels, modelImageSize, originalSize); - cv::Mat maxArg = cv::Mat(originalSize, CV_32S); + cv::Mat argMax = cv::Mat(originalSize, CV_32S); - // For each pixel apply softmax across all the labels and calculate the maxArg - adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels); + // For each pixel apply softmax across all the labels and calculate the argMax + adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels); std::unordered_set labelSet; @@ -120,7 +117,7 @@ - (NSDictionary *)postprocess:(NSArray *)output } } - result[@"argmax"] = simpleMatToNSArray(maxArg); + result[@"argmax"] = simpleMatToNSArray(argMax); return result; } diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts deleted file mode 100644 index b2f15f44..00000000 --- a/src/constants/image_segmentation/image_segmentation.ts +++ /dev/null @@ -1,23 +0,0 @@ -export const classLabels = new Map([ - [0, 'background'], - [1, 'aeroplane'], - [2, 'bicycle'], - [3, 'bird'], - [4, 'boat'], - [5, 'bottle'], - [6, 'bus'], - [7, 'car'], - [8, 'cat'], - [9, 'chair'], - [10, 'cow'], - [11, 'diningtable'], - [12, 'dog'], - [13, 'horse'], - [14, 'motorbike'], - [15, 'person'], - [16, 'pottedplant'], - [17, 'sheep'], - [18, 'sofa'], - [19, 'train'], - [20, 'tvmonitor'], -]); diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 7b650d30..43bd98f1 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -2,6 +2,7 @@ import { useState } from 'react'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; import { ETError, getError } from '../../Error'; import { useModule } from '../useModule'; +import { DeeplabLabel } from '../../types/image_segmentation'; interface Props { modelSource: string | number; @@ -16,8 +17,8 @@ export const useImageSegmentation = ({ downloadProgress: number; forward: ( input: string, - classesOfInterest?: string[] - ) => Promise<{ [category: string]: number[] }>; + classesOfInterest?: DeeplabLabel[] + ) => Promise<{ [key in DeeplabLabel]?: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); const [isGenerating, setIsGenerating] = useState(false); @@ -26,7 +27,7 @@ export const useImageSegmentation = ({ module, }); - const forward = async (input: string, classesOfInterest?: string[]) => { + const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => { if (!isReady) { throw new Error(getError(ETError.ModuleNotLoaded)); } @@ -36,8 +37,20 @@ export const useImageSegmentation = ({ try { setIsGenerating(true); - const output = await module.forward(input, classesOfInterest || []); - return output; + const stringDict = await module.forward( + input, + (classesOfInterest || []).map((label) => DeeplabLabel[label]) + ); + + let enumDict: { [key in DeeplabLabel]?: number[] } = {}; + + for (const key in stringDict) { + if (key in DeeplabLabel) { + const enumKey = DeeplabLabel[key as keyof typeof DeeplabLabel]; + enumDict[enumKey] = stringDict[key]; + } + } + return enumDict; } catch (e) { throw new Error(getError(e)); } finally { diff --git a/src/index.tsx b/src/index.tsx index 8d322696..c4ae2f55 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -30,6 +30,7 @@ export * from './utils/listDownloadedResources'; // types export * from './types/object_detection'; export * from './types/ocr'; +export * from './types/image_segmentation'; // constants export * from './constants/modelUrls'; diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index f6646feb..f2a6a167 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -5,12 +5,11 @@ import { getError } from '../../Error'; export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); - static async forward(input: string, classesOfInteres?: string[]) { + static async forward(input: string, classesOfInterest: string[]) { try { - return await (this.module.forward( - input, - classesOfInteres || [] - ) as ReturnType<_ImageSegmentationModule['forward']>); + return await (this.module.forward(input, classesOfInterest) as ReturnType< + _ImageSegmentationModule['forward'] + >); } catch (e) { throw new Error(getError(e)); } diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts new file mode 100644 index 00000000..7d03d517 --- /dev/null +++ b/src/types/image_segmentation.ts @@ -0,0 +1,24 @@ +export enum DeeplabLabel { + background, + aeroplane, + bicycle, + bird, + boat, + bottle, + bus, + car, + cat, + chair, + cow, + diningtable, + dog, + horse, + motorbike, + person, + pottedplant, + sheep, + sofa, + train, + tvmonitor, + argmax, // Additional label not present in the model +} From d8d36df7c93bc1e78530816b381ccead2616872e Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Mon, 10 Mar 2025 12:30:04 +0100 Subject: [PATCH 8/9] Add optionality for segmentation output resize --- ios/RnExecutorch/ImageSegmentation.mm | 9 ++-- .../ImageSegmentationModel.h | 3 +- .../ImageSegmentationModel.mm | 46 ++++++++++++------- .../computer_vision/useImageSegmentation.ts | 12 +++-- .../ImageSegmentationModule.ts | 14 ++++-- src/native/NativeImageSegmentation.ts | 3 +- src/native/RnExecutorchModules.ts | 5 +- 7 files changed, 61 insertions(+), 31 deletions(-) diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm index cf9366d5..19cbe664 100644 --- a/ios/RnExecutorch/ImageSegmentation.mm +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -35,13 +35,16 @@ - (void)loadModule:(NSString *)modelSource - (void)forward:(NSString *)input classesOfInterest:(NSArray *)classesOfInterest + resize:(BOOL)resize resolve:(RCTPromiseResolveBlock)resolve - reject:(RCTPromiseRejectBlock)reject { + reject:(RCTPromiseRejectBlock)reject { @try { cv::Mat image = [ImageProcessor readImage:input]; - NSDictionary *result = [model runModel:image returnClasses:classesOfInterest]; - + NSDictionary *result = [model runModel:image + returnClasses:classesOfInterest + resize:resize]; + resolve(result); return; } @catch (NSException *exception) { diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 40b39583..a58733a1 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -4,6 +4,7 @@ @interface ImageSegmentationModel : BaseModel - (cv::Size)getModelImageSize; - (NSDictionary *)runModel:(cv::Mat &)input - returnClasses:(NSArray *)classesOfInterest; + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize; @end \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 7aa10a9f..70638bd4 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -2,7 +2,7 @@ #import #import #import -#i\port "../../utils/ImageProcessor.h" +#import "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" #import "../../utils/Conversions.h" #import "opencv2/opencv.hpp" @@ -11,7 +11,8 @@ @interface ImageSegmentationModel () - (NSArray *)preprocess:(cv::Mat &)input; - (NSDictionary *)postprocess:(NSArray *)output - returnClasses:(NSArray *)classesOfInterest; + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize; @end @implementation ImageSegmentationModel { @@ -40,8 +41,8 @@ - (NSArray *)preprocess:(cv::Mat &)input { return modelInput; } -std::vector rescaleResults(NSArray *result, std::size_t numLabels, - cv::Size modelImageSize, cv::Size originalSize) { +std::vector extractResults(NSArray *result, std::size_t numLabels, + cv::Size modelImageSize, cv::Size originalSize, BOOL resize) { std::size_t numModelPixels = modelImageSize.height * modelImageSize.width; std::vector resizedLabelScores(numLabels); @@ -54,17 +55,22 @@ - (NSArray *)preprocess:(cv::Mat &)input { labelMat.at(row, col) = [result[label * numModelPixels + pixel] doubleValue]; } - cv::resize(labelMat, resizedLabelScores[label], originalSize); + if (resize) { + cv::resize(labelMat, resizedLabelScores[label], originalSize); + } + else { + resizedLabelScores[label] = std::move(labelMat); + } } return resizedLabelScores; } void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& argMax, - cv::Size originalSize, std::size_t numLabels) { - std::size_t numOriginalPixels = originalSize.height * originalSize.width; - for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { - int row = pixel / originalSize.width; - int col = pixel % originalSize.width; + cv::Size outputSize, std::size_t numLabels) { + std::size_t numOutputPixels = outputSize.height * outputSize.width; + for (std::size_t pixel = 0; pixel < numOutputPixels; ++pixel) { + int row = pixel / outputSize.width; + int col = pixel % outputSize.width; std::vector scores; scores.reserve(numLabels); for (const auto& mat : labelScores) { @@ -83,7 +89,8 @@ void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& argMax, } - (NSDictionary *)postprocess:(NSArray *)output - returnClasses:(NSArray *)classesOfInterest{ + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize { cv::Size modelImageSize = [self getModelImageSize]; std::size_t numLabels = deeplabv3_resnet50_labels.size(); @@ -91,14 +98,16 @@ - (NSDictionary *)postprocess:(NSArray *)output NSAssert((std::size_t)output.count == numLabels * modelImageSize.height * modelImageSize.width, @"Model generated unexpected output size."); - // For each label extract it's matrix and rescale it to the original size + // For each label extract it's matrix, + // and rescale it to the original size if `resize` std::vector resizedLabelScores = - rescaleResults(output, numLabels, modelImageSize, originalSize); + extractResults(output, numLabels, modelImageSize, originalSize, resize); - cv::Mat argMax = cv::Mat(originalSize, CV_32S); + cv::Size outputSize = resize ? originalSize : modelImageSize; + cv::Mat argMax = cv::Mat(outputSize, CV_32S); // For each pixel apply softmax across all the labels and calculate the argMax - adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels); + adjustScoresPerPixel(resizedLabelScores, argMax, outputSize, numLabels); std::unordered_set labelSet; @@ -123,11 +132,14 @@ - (NSDictionary *)postprocess:(NSArray *)output } - (NSDictionary *)runModel:(cv::Mat &)input - returnClasses:(NSArray *)classesOfInterest { + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize { NSArray *modelInput = [self preprocess:input]; NSArray *result = [self forward:modelInput]; - NSDictionary *output = [self postprocess:result[0] returnClasses:classesOfInterest]; + NSDictionary *output = [self postprocess:result[0] + returnClasses:classesOfInterest + resize:resize]; return output; } diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 43bd98f1..4e562d6b 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -17,7 +17,8 @@ export const useImageSegmentation = ({ downloadProgress: number; forward: ( input: string, - classesOfInterest?: DeeplabLabel[] + classesOfInterest?: DeeplabLabel[], + resize?: boolean ) => Promise<{ [key in DeeplabLabel]?: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); @@ -27,7 +28,11 @@ export const useImageSegmentation = ({ module, }); - const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => { + const forward = async ( + input: string, + classesOfInterest?: DeeplabLabel[], + resize?: boolean + ) => { if (!isReady) { throw new Error(getError(ETError.ModuleNotLoaded)); } @@ -39,7 +44,8 @@ export const useImageSegmentation = ({ setIsGenerating(true); const stringDict = await module.forward( input, - (classesOfInterest || []).map((label) => DeeplabLabel[label]) + (classesOfInterest || []).map((label) => DeeplabLabel[label]), + resize || false ); let enumDict: { [key in DeeplabLabel]?: number[] } = {}; diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index f2a6a167..1d078c1c 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -5,11 +5,17 @@ import { getError } from '../../Error'; export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); - static async forward(input: string, classesOfInterest: string[]) { + static async forward( + input: string, + classesOfInterest: string[], + resize: boolean + ) { try { - return await (this.module.forward(input, classesOfInterest) as ReturnType< - _ImageSegmentationModule['forward'] - >); + return await (this.module.forward( + input, + classesOfInterest, + resize + ) as ReturnType<_ImageSegmentationModule['forward']>); } catch (e) { throw new Error(getError(e)); } diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts index ccff2731..c66c8743 100644 --- a/src/native/NativeImageSegmentation.ts +++ b/src/native/NativeImageSegmentation.ts @@ -6,7 +6,8 @@ export interface Spec extends TurboModule { forward( input: string, - classesOfInterest: string[] + classesOfInterest: string[], + resize: boolean ): Promise<{ [category: string]: number[] }>; } diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts index eb6e7087..62ebd309 100644 --- a/src/native/RnExecutorchModules.ts +++ b/src/native/RnExecutorchModules.ts @@ -133,9 +133,10 @@ const VerticalOCR = VerticalOCRSpec class _ImageSegmentationModule { async forward( input: string, - classesOfInteres: string[] + classesOfInteres: string[], + resize: boolean ): ReturnType { - return await ImageSegmentation.forward(input, classesOfInteres); + return await ImageSegmentation.forward(input, classesOfInteres, resize); } async loadModule( modelSource: string | number From fbd1c85b994aa7cdba2381ecc67134fd8d2cf3fd Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 12 Mar 2025 15:12:41 +0100 Subject: [PATCH 9/9] Change segmentation enum values to upper case --- .../models/image_segmentation/Constants.mm | 8 ++-- .../ImageSegmentationModel.mm | 2 +- src/types/image_segmentation.ts | 44 +++++++++---------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.mm b/ios/RnExecutorch/models/image_segmentation/Constants.mm index 4d98f34d..84ce9ea6 100644 --- a/ios/RnExecutorch/models/image_segmentation/Constants.mm +++ b/ios/RnExecutorch/models/image_segmentation/Constants.mm @@ -3,8 +3,8 @@ #import const std::vector deeplabv3_resnet50_labels = { - "background", "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", - "dog", "horse", "motorbike", "person", "pottedplant", "sheep", - "sofa", "train", "tvmonitor" + "BACKGROUND", "AEROPLANE", "BICYCLE", "BIRD", "BOAT", + "BOTTLE", "BUS", "CAR", "CAT", "CHAIR", "COW", "DININGTABLE", + "DOG", "HORSE", "MOTORBIKE", "PERSON", "POTTEDPLANT", "SHEEP", + "SOFA", "TRAIN", "TVMONITOR" }; \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 70638bd4..951687c5 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -126,7 +126,7 @@ - (NSDictionary *)postprocess:(NSArray *)output } } - result[@"argmax"] = simpleMatToNSArray(argMax); + result[@"ARGMAX"] = simpleMatToNSArray(argMax); return result; } diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts index 7d03d517..bc7d254d 100644 --- a/src/types/image_segmentation.ts +++ b/src/types/image_segmentation.ts @@ -1,24 +1,24 @@ export enum DeeplabLabel { - background, - aeroplane, - bicycle, - bird, - boat, - bottle, - bus, - car, - cat, - chair, - cow, - diningtable, - dog, - horse, - motorbike, - person, - pottedplant, - sheep, - sofa, - train, - tvmonitor, - argmax, // Additional label not present in the model + BACKGROUND, + AEROPLANE, + BICYCLE, + BIRD, + BOAT, + BOTTLE, + BUS, + CAR, + CAT, + CHAIR, + COW, + DININGTABLE, + DOG, + HORSE, + MOTORBIKE, + PERSON, + POTTEDPLANT, + SHEEP, + SOFA, + TRAIN, + TVMONITOR, + ARGMAX, // Additional label not present in the model }