From 8741c3c9a120a1276247ed63c4ae80ad1a2aee8a Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Thu, 27 Feb 2025 11:19:23 +0100 Subject: [PATCH 01/16] Add scaffolding for image segmentation native code --- ios/RnExecutorch/ImageSegmentation.h | 5 ++ ios/RnExecutorch/ImageSegmentation.mm | 59 +++++++++++++++++++ ios/RnExecutorch/StyleTransfer.mm | 2 +- .../ImageSegmentationModel.h | 5 ++ .../ImageSegmentationModel.mm | 6 ++ .../{ => style_transfer}/StyleTransferModel.h | 2 +- .../StyleTransferModel.mm | 2 +- .../computer_vision/useImageSegmentation.ts | 31 ++++++++++ src/index.tsx | 2 + src/modules/BaseModule.ts | 2 + .../ImageSegmentationModule.ts | 12 ++++ src/native/NativeImageSegmentation.ts | 10 ++++ src/native/RnExecutorchModules.ts | 29 +++++++++ 13 files changed, 164 insertions(+), 3 deletions(-) create mode 100644 ios/RnExecutorch/ImageSegmentation.h create mode 100644 ios/RnExecutorch/ImageSegmentation.mm create mode 100644 ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h create mode 100644 ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm rename ios/RnExecutorch/models/{ => style_transfer}/StyleTransferModel.h (90%) rename ios/RnExecutorch/models/{ => style_transfer}/StyleTransferModel.mm (97%) create mode 100644 src/hooks/computer_vision/useImageSegmentation.ts create mode 100644 src/modules/computer_vision/ImageSegmentationModule.ts create mode 100644 src/native/NativeImageSegmentation.ts diff --git a/ios/RnExecutorch/ImageSegmentation.h b/ios/RnExecutorch/ImageSegmentation.h new file mode 100644 index 00000000..59ed56a4 --- /dev/null +++ b/ios/RnExecutorch/ImageSegmentation.h @@ -0,0 +1,5 @@ +#import + +@interface ImageSegmentation : NSObject + +@end \ No newline at end of file diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm new file mode 100644 index 00000000..ef526087 --- /dev/null +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -0,0 +1,59 @@ +#import "ImageSegmentation.h" +#import "models/image_segmentation/ImageSegmentationModel.h" +#import "models/BaseModel.h" +#import "utils/ETError.h" +#import +#import + +@implementation ImageSegmentation { + ImageSegmentationModel *model; +} + +RCT_EXPORT_MODULE() + +- (void)loadModule:(NSString *)modelSource + resolve:(RCTPromiseResolveBlock)resolve + reject:(RCTPromiseRejectBlock)reject { + + NSLog(@"Segmentation: loadModule"); + model = [[ImageSegmentationModel alloc] init]; + [model + loadModel:[NSURL URLWithString:modelSource] + completion:^(BOOL success, NSNumber *errorCode) { + if (success) { + resolve(errorCode); + return; + } + + reject(@"init_module_error", + [NSString stringWithFormat:@"%ld", (long)[errorCode longValue]], + nil); + return; + }]; +} + +- (void)forward:(NSString *)input + resolve:(RCTPromiseResolveBlock)resolve + reject:(RCTPromiseRejectBlock)reject { + NSLog(@"Segmentation: forward"); +// @try { +// cv::Mat image = [ImageProcessor readImage:input]; +// cv::Mat resultImage = [model runModel:image]; + +// NSString *tempFilePath = [ImageProcessor saveToTempFile:resultImage]; +// resolve(tempFilePath); +// return; +// } @catch (NSException *exception) { +// NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason); +// reject(@"forward_error", +// [NSString stringWithFormat:@"%@", exception.reason], nil); +// return; +// } +} + +- (std::shared_ptr)getTurboModule: + (const facebook::react::ObjCTurboModule::InitParams &)params { + return std::make_shared(params); +} + +@end diff --git a/ios/RnExecutorch/StyleTransfer.mm b/ios/RnExecutorch/StyleTransfer.mm index 08e8d4a3..52930cd4 100644 --- a/ios/RnExecutorch/StyleTransfer.mm +++ b/ios/RnExecutorch/StyleTransfer.mm @@ -1,7 +1,7 @@ #import "StyleTransfer.h" #import "ImageProcessor.h" #import "models/BaseModel.h" -#import "models/StyleTransferModel.h" +#import "models/style_transfer/StyleTransferModel.h" #import "utils/ETError.h" #import #import diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h new file mode 100644 index 00000000..0e02a94c --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -0,0 +1,5 @@ +#import "../BaseModel.h" + +@interface ImageSegmentationModel : BaseModel + +@end diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm new file mode 100644 index 00000000..8212bdee --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -0,0 +1,6 @@ +#import "ImageSegmentationModel.h" + +@implementation ImageSegmentationModel { +} + +@end diff --git a/ios/RnExecutorch/models/StyleTransferModel.h b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h similarity index 90% rename from ios/RnExecutorch/models/StyleTransferModel.h rename to ios/RnExecutorch/models/style_transfer/StyleTransferModel.h index 1fd91d7b..20cdf6dd 100644 --- a/ios/RnExecutorch/models/StyleTransferModel.h +++ b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.h @@ -1,4 +1,4 @@ -#import "BaseModel.h" +#import "../BaseModel.h" #import "opencv2/opencv.hpp" @interface StyleTransferModel : BaseModel diff --git a/ios/RnExecutorch/models/StyleTransferModel.mm b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm similarity index 97% rename from ios/RnExecutorch/models/StyleTransferModel.mm rename to ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm index 6051e24b..6a351431 100644 --- a/ios/RnExecutorch/models/StyleTransferModel.mm +++ b/ios/RnExecutorch/models/style_transfer/StyleTransferModel.mm @@ -1,5 +1,5 @@ #import "StyleTransferModel.h" -#import "../utils/ImageProcessor.h" +#import "../../utils/ImageProcessor.h" #import "opencv2/opencv.hpp" @implementation StyleTransferModel { diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts new file mode 100644 index 00000000..2134b624 --- /dev/null +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -0,0 +1,31 @@ +import { useState } from 'react'; +import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; +import { useModule } from '../useModule'; + +interface Props { + modelSource: string | number; +} + +export const useImageSegmentation = ({ + modelSource, +}: Props): { + error: string | null; + isReady: boolean; + isGenerating: boolean; + downloadProgress: number; + forward: (input: string) => Promise; +} => { + const [module, _] = useState(() => new _ImageSegmentationModule()); + const { + error, + isReady, + isGenerating, + downloadProgress, + forwardImage: forward, + } = useModule({ + modelSource, + module, + }); + + return { error, isReady, isGenerating, downloadProgress, forward }; +}; diff --git a/src/index.tsx b/src/index.tsx index 7ae7a7ad..8d322696 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -2,6 +2,7 @@ export * from './hooks/computer_vision/useClassification'; export * from './hooks/computer_vision/useObjectDetection'; export * from './hooks/computer_vision/useStyleTransfer'; +export * from './hooks/computer_vision/useImageSegmentation'; export * from './hooks/computer_vision/useOCR'; export * from './hooks/computer_vision/useVerticalOCR'; @@ -14,6 +15,7 @@ export * from './hooks/general/useExecutorchModule'; export * from './modules/computer_vision/ClassificationModule'; export * from './modules/computer_vision/ObjectDetectionModule'; export * from './modules/computer_vision/StyleTransferModule'; +export * from './modules/computer_vision/ImageSegmentationModule'; export * from './modules/computer_vision/OCRModule'; export * from './modules/computer_vision/VerticalOCRModule'; diff --git a/src/modules/BaseModule.ts b/src/modules/BaseModule.ts index e977836f..56cf2e3d 100644 --- a/src/modules/BaseModule.ts +++ b/src/modules/BaseModule.ts @@ -1,4 +1,5 @@ import { + _ImageSegmentationModule, _StyleTransferModule, _ObjectDetectionModule, _ClassificationModule, @@ -10,6 +11,7 @@ import { getError } from '../Error'; export class BaseModule { static module: + | _ImageSegmentationModule | _StyleTransferModule | _ObjectDetectionModule | _ClassificationModule diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts new file mode 100644 index 00000000..eae124a8 --- /dev/null +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -0,0 +1,12 @@ +import { BaseCVModule } from './BaseCVModule'; +import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; + +export class ImageSegmentationModule extends BaseCVModule { + static module = new _ImageSegmentationModule(); + + static async forward(input: string) { + return await (super.forward(input) as ReturnType< + _ImageSegmentationModule['forward'] + >); + } +} diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts new file mode 100644 index 00000000..1dcc9c27 --- /dev/null +++ b/src/native/NativeImageSegmentation.ts @@ -0,0 +1,10 @@ +import type { TurboModule } from 'react-native'; +import { TurboModuleRegistry } from 'react-native'; + +export interface Spec extends TurboModule { + loadModule(modelSource: string): Promise; + + forward(input: string): Promise; +} + +export default TurboModuleRegistry.get('ImageSegmentation'); diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts index b1edcf52..c48c08a3 100644 --- a/src/native/RnExecutorchModules.ts +++ b/src/native/RnExecutorchModules.ts @@ -2,6 +2,7 @@ import { Platform } from 'react-native'; import { Spec as ClassificationInterface } from './NativeClassification'; import { Spec as ObjectDetectionInterface } from './NativeObjectDetection'; import { Spec as StyleTransferInterface } from './NativeStyleTransfer'; +import { Spec as ImageSegmentationInterface } from './NativeImageSegmentation'; import { Spec as ETModuleInterface } from './NativeETModule'; import { Spec as OCRInterface } from './NativeOCR'; import { Spec as VerticalOCRInterface } from './NativeVerticalOCR'; @@ -51,6 +52,19 @@ const Classification = ClassificationSpec } ); +const ImageSegmentationSpec = require('./NativeImageSegmentation').default; + +const ImageSegmentation = ImageSegmentationSpec + ? ImageSegmentationSpec + : new Proxy( + {}, + { + get() { + throw new Error(LINKING_ERROR); + }, + } + ); + const ObjectDetectionSpec = require('./NativeObjectDetection').default; const ObjectDetection = ObjectDetectionSpec @@ -116,6 +130,19 @@ const VerticalOCR = VerticalOCRSpec } ); +class _ImageSegmentationModule { + async forward( + input: string + ): ReturnType { + return await ImageSegmentation.forward(input); + } + async loadModule( + modelSource: string | number + ): ReturnType { + return await ImageSegmentation.loadModule(modelSource); + } +} + class _ObjectDetectionModule { async forward( input: string @@ -239,12 +266,14 @@ export { Classification, ObjectDetection, StyleTransfer, + ImageSegmentation, SpeechToText, OCR, VerticalOCR, _ETModule, _ClassificationModule, _StyleTransferModule, + _ImageSegmentationModule, _ObjectDetectionModule, _SpeechToTextModule, _OCRModule, From f5e6e5a12705acbd347b21a95c6beb9834d40faa Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Tue, 4 Mar 2025 11:12:55 +0100 Subject: [PATCH 02/16] Add working ios-native model --- ios/RnExecutorch/ImageSegmentation.mm | 30 +++--- .../classification/ClassificationModel.mm | 2 +- .../models/image_segmentation/Constants.h | 5 + .../models/image_segmentation/Constants.mm | 10 ++ .../ImageSegmentationModel.h | 3 + .../ImageSegmentationModel.mm | 102 ++++++++++++++++++ .../Utils.h => utils/Numerical.h} | 0 .../Utils.mm => utils/Numerical.mm} | 0 .../computer_vision/useImageSegmentation.ts | 2 +- src/modules/computer_vision/BaseCVModule.ts | 2 + src/native/NativeImageSegmentation.ts | 2 +- 11 files changed, 140 insertions(+), 18 deletions(-) create mode 100644 ios/RnExecutorch/models/image_segmentation/Constants.h create mode 100644 ios/RnExecutorch/models/image_segmentation/Constants.mm rename ios/RnExecutorch/{models/classification/Utils.h => utils/Numerical.h} (100%) rename ios/RnExecutorch/{models/classification/Utils.mm => utils/Numerical.mm} (100%) diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm index ef526087..df972ab2 100644 --- a/ios/RnExecutorch/ImageSegmentation.mm +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -4,6 +4,8 @@ #import "utils/ETError.h" #import #import +#import +#import "ImageProcessor.h" @implementation ImageSegmentation { ImageSegmentationModel *model; @@ -15,7 +17,6 @@ - (void)loadModule:(NSString *)modelSource resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - NSLog(@"Segmentation: loadModule"); model = [[ImageSegmentationModel alloc] init]; [model loadModel:[NSURL URLWithString:modelSource] @@ -35,20 +36,19 @@ - (void)loadModule:(NSString *)modelSource - (void)forward:(NSString *)input resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { - NSLog(@"Segmentation: forward"); -// @try { -// cv::Mat image = [ImageProcessor readImage:input]; -// cv::Mat resultImage = [model runModel:image]; - -// NSString *tempFilePath = [ImageProcessor saveToTempFile:resultImage]; -// resolve(tempFilePath); -// return; -// } @catch (NSException *exception) { -// NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason); -// reject(@"forward_error", -// [NSString stringWithFormat:@"%@", exception.reason], nil); -// return; -// } + + @try { + cv::Mat image = [ImageProcessor readImage:input]; + NSDictionary *result= [model runModel:image]; + + resolve(result); + return; + } @catch (NSException *exception) { + NSLog(@"An exception occurred: %@, %@", exception.name, exception.reason); + reject(@"forward_error", + [NSString stringWithFormat:@"%@", exception.reason], nil); + return; + } } - (std::shared_ptr)getTurboModule: diff --git a/ios/RnExecutorch/models/classification/ClassificationModel.mm b/ios/RnExecutorch/models/classification/ClassificationModel.mm index 8e7973e2..0306e67c 100644 --- a/ios/RnExecutorch/models/classification/ClassificationModel.mm +++ b/ios/RnExecutorch/models/classification/ClassificationModel.mm @@ -1,7 +1,7 @@ #import "ClassificationModel.h" #import "../../utils/ImageProcessor.h" +#import "../../utils/Numerical.h" #import "Constants.h" -#import "Utils.h" #import "opencv2/opencv.hpp" @implementation ClassificationModel diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.h b/ios/RnExecutorch/models/image_segmentation/Constants.h new file mode 100644 index 00000000..889556d7 --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/Constants.h @@ -0,0 +1,5 @@ +#import +#import + + +extern const std::vector deeplabv3_resnet50_labels; diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.mm b/ios/RnExecutorch/models/image_segmentation/Constants.mm new file mode 100644 index 00000000..4d98f34d --- /dev/null +++ b/ios/RnExecutorch/models/image_segmentation/Constants.mm @@ -0,0 +1,10 @@ +#import "Constants.h" +#import +#import + +const std::vector deeplabv3_resnet50_labels = { + "background", "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", + "dog", "horse", "motorbike", "person", "pottedplant", "sheep", + "sofa", "train", "tvmonitor" +}; \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 0e02a94c..66dfb4b6 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -1,5 +1,8 @@ #import "../BaseModel.h" +#import "opencv2/opencv.hpp" @interface ImageSegmentationModel : BaseModel +- (cv::Size)getModelImageSize; +- (NSDictionary *)runModel:(cv::Mat &)input; @end diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 8212bdee..f504cf12 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -1,6 +1,108 @@ #import "ImageSegmentationModel.h" +#import "../../utils/ImageProcessor.h" +#import "../../utils/Numerical.h" +#import "opencv2/opencv.hpp" +#import "Constants.h" + +@interface ImageSegmentationModel () + - (NSArray *)preprocess:(cv::Mat &)input; + - (NSDictionary *)postprocess:(NSArray *)output; +@end @implementation ImageSegmentationModel { + cv::Size originalSize; +} + +- (cv::Size)getModelImageSize { + NSArray *inputShape = [module getInputShape:@0]; + NSNumber *widthNumber = inputShape.lastObject; + NSNumber *heightNumber = inputShape[inputShape.count - 2]; + + int height = [heightNumber intValue]; + int width = [widthNumber intValue]; + + return cv::Size(height, width); +} + +- (NSArray *)preprocess:(cv::Mat &)input { + originalSize = cv::Size(input.cols, input.rows); + + cv::Size modelImageSize = [self getModelImageSize]; + cv::Mat output; + cv::resize(input, output, modelImageSize); + + NSArray *modelInput = [ImageProcessor matToNSArray:output]; + return modelInput; +} + +- (NSDictionary *)postprocess:(NSArray *)output { + cv::Size modelImageSize = [self getModelImageSize]; + + std::size_t numLabels = deeplabv3_resnet50_labels.size(); + std::size_t numModelPixels = modelImageSize.height * modelImageSize.width; + std::size_t numOriginalPixels = originalSize.height * originalSize.width; + std::size_t outputSize = (std::size_t)output.count; + + NSAssert(outputSize == numLabels * numModelPixels, + @"Model generated unexpected output size."); + + + // For each label extract it's matrix and rescale it to the original size + std::vector resizedLabelScores(numLabels); + for (std::size_t label = 0; label < numLabels; ++label) { + cv::Mat labelMat = cv::Mat(modelImageSize, CV_64F); + + for(std::size_t pixel = 0; pixel < numModelPixels; ++pixel){ + int row = pixel / modelImageSize.width; + int col = pixel % modelImageSize.width; + labelMat.at(row, col) = [output[label * numModelPixels + pixel] doubleValue]; + } + + cv::resize(labelMat, resizedLabelScores[label], originalSize); + } + + // For each pixel apply softmax across all the labels + for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { + int row = pixel / originalSize.width; + int col = pixel % originalSize.width; + std::vector scores; + scores.reserve(numLabels); + for (const cv::Mat& mat : resizedLabelScores) { + scores.push_back(mat.at(row, col)); + } + + std::vector adjustedScores = softmax(scores); + + for (std::size_t label = 0; label < numLabels; ++label) { + resizedLabelScores[label].at(row, col) = adjustedScores[label]; + } + } + + NSMutableDictionary *result = [NSMutableDictionary dictionary]; + + for (std::size_t label = 0; label < numLabels; ++label) { + NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); + NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numOriginalPixels]; + + for (std::size_t x = 0; x < originalSize.height; ++x) { + for (std::size_t y = 0; y < originalSize.width; ++y) { + arr[x * originalSize.width + y] = @(resizedLabelScores[label].at(x, y)); + } + } + + result[labelString] = arr; + } + + return result; +} + +- (NSDictionary *)runModel:(cv::Mat &)input { + NSArray *modelInput = [self preprocess:input]; + NSArray *result = [self forward:modelInput]; + + NSDictionary *output = [self postprocess:result[0]]; + + return output; } @end diff --git a/ios/RnExecutorch/models/classification/Utils.h b/ios/RnExecutorch/utils/Numerical.h similarity index 100% rename from ios/RnExecutorch/models/classification/Utils.h rename to ios/RnExecutorch/utils/Numerical.h diff --git a/ios/RnExecutorch/models/classification/Utils.mm b/ios/RnExecutorch/utils/Numerical.mm similarity index 100% rename from ios/RnExecutorch/models/classification/Utils.mm rename to ios/RnExecutorch/utils/Numerical.mm diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 2134b624..746a45e0 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -13,7 +13,7 @@ export const useImageSegmentation = ({ isReady: boolean; isGenerating: boolean; downloadProgress: number; - forward: (input: string) => Promise; + forward: (input: string) => Promise<{ [category: string]: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); const { diff --git a/src/modules/computer_vision/BaseCVModule.ts b/src/modules/computer_vision/BaseCVModule.ts index c61987d3..2702a6f9 100644 --- a/src/modules/computer_vision/BaseCVModule.ts +++ b/src/modules/computer_vision/BaseCVModule.ts @@ -3,6 +3,7 @@ import { _StyleTransferModule, _ObjectDetectionModule, _ClassificationModule, + _ImageSegmentationModule, } from '../../native/RnExecutorchModules'; import { getError } from '../../Error'; @@ -10,6 +11,7 @@ export class BaseCVModule extends BaseModule { static module: | _StyleTransferModule | _ObjectDetectionModule + | _ImageSegmentationModule | _ClassificationModule; static async forward(input: string) { diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts index 1dcc9c27..c65b4cb7 100644 --- a/src/native/NativeImageSegmentation.ts +++ b/src/native/NativeImageSegmentation.ts @@ -4,7 +4,7 @@ import { TurboModuleRegistry } from 'react-native'; export interface Spec extends TurboModule { loadModule(modelSource: string): Promise; - forward(input: string): Promise; + forward(input: string): Promise<{ [category: string]: number[] }>; } export default TurboModuleRegistry.get('ImageSegmentation'); From 15070ba92bab567b33b7beb71426eb7e2bcaa33b Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Tue, 4 Mar 2025 13:21:43 +0100 Subject: [PATCH 03/16] Add arg max map to the segmentation result --- .../ImageSegmentationModel.h | 13 +++++++++++ .../ImageSegmentationModel.mm | 23 +++++++++++-------- .../image_segmentation/image_segmentation.ts | 23 +++++++++++++++++++ 3 files changed, 50 insertions(+), 9 deletions(-) create mode 100644 src/constants/image_segmentation/image_segmentation.ts diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 66dfb4b6..95535ba0 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -6,3 +6,16 @@ - (NSDictionary *)runModel:(cv::Mat &)input; @end + +template +NSArray* matToNSArray(const cv::Mat& mat) { + std::size_t numPixels = mat.rows * mat.cols; + NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels]; + + for (std::size_t x = 0; x < mat.rows; ++x) { + for (std::size_t y = 0; y < mat.cols; ++y) { + arr[x * mat.cols + y] = @(mat.at(x, y)); + } + } + return arr; +} \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index f504cf12..52707a4e 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -46,7 +46,6 @@ - (NSDictionary *)postprocess:(NSArray *)output { NSAssert(outputSize == numLabels * numModelPixels, @"Model generated unexpected output size."); - // For each label extract it's matrix and rescale it to the original size std::vector resizedLabelScores(numLabels); for (std::size_t label = 0; label < numLabels; ++label) { @@ -61,6 +60,8 @@ - (NSDictionary *)postprocess:(NSArray *)output { cv::resize(labelMat, resizedLabelScores[label], originalSize); } + cv::Mat maxArg = cv::Mat(originalSize, CV_32S); + // For each pixel apply softmax across all the labels for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { int row = pixel / originalSize.width; @@ -73,26 +74,30 @@ - (NSDictionary *)postprocess:(NSArray *)output { std::vector adjustedScores = softmax(scores); + std::size_t maxArgIndex = 0; + double maxArgVal = 0; for (std::size_t label = 0; label < numLabels; ++label) { resizedLabelScores[label].at(row, col) = adjustedScores[label]; + if (adjustedScores[label] > maxArgVal) { + maxArgIndex = label; + maxArgVal = adjustedScores[label]; + } } + + maxArg.at(row, col) = maxArgIndex; } NSMutableDictionary *result = [NSMutableDictionary dictionary]; + // Convert to NSArray and populate the final dictionary for (std::size_t label = 0; label < numLabels; ++label) { NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); - NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numOriginalPixels]; - - for (std::size_t x = 0; x < originalSize.height; ++x) { - for (std::size_t y = 0; y < originalSize.width; ++y) { - arr[x * originalSize.width + y] = @(resizedLabelScores[label].at(x, y)); - } - } - + NSMutableArray *arr = matToNSArray(resizedLabelScores[label]); result[labelString] = arr; } + result[@"argmax"] = matToNSArray(maxArg); + return result; } diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts new file mode 100644 index 00000000..b2f15f44 --- /dev/null +++ b/src/constants/image_segmentation/image_segmentation.ts @@ -0,0 +1,23 @@ +export const classLabels = new Map([ + [0, 'background'], + [1, 'aeroplane'], + [2, 'bicycle'], + [3, 'bird'], + [4, 'boat'], + [5, 'bottle'], + [6, 'bus'], + [7, 'car'], + [8, 'cat'], + [9, 'chair'], + [10, 'cow'], + [11, 'diningtable'], + [12, 'dog'], + [13, 'horse'], + [14, 'motorbike'], + [15, 'person'], + [16, 'pottedplant'], + [17, 'sheep'], + [18, 'sofa'], + [19, 'train'], + [20, 'tvmonitor'], +]); From ba058a6d9bf88c4ec01a437e1ee8e0ebb964265d Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 5 Mar 2025 12:14:27 +0100 Subject: [PATCH 04/16] Add a way to filter what segmentation classes are returned --- ios/RnExecutorch/ImageSegmentation.mm | 3 +- .../ImageSegmentationModel.h | 3 +- .../ImageSegmentationModel.mm | 28 ++++++--- .../computer_vision/useImageSegmentation.ts | 60 +++++++++++++++---- src/modules/computer_vision/BaseCVModule.ts | 2 - .../ImageSegmentationModule.ts | 19 ++++-- src/native/NativeImageSegmentation.ts | 5 +- src/native/RnExecutorchModules.ts | 5 +- 8 files changed, 91 insertions(+), 34 deletions(-) diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm index df972ab2..cf9366d5 100644 --- a/ios/RnExecutorch/ImageSegmentation.mm +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -34,12 +34,13 @@ - (void)loadModule:(NSString *)modelSource } - (void)forward:(NSString *)input + classesOfInterest:(NSArray *)classesOfInterest resolve:(RCTPromiseResolveBlock)resolve reject:(RCTPromiseRejectBlock)reject { @try { cv::Mat image = [ImageProcessor readImage:input]; - NSDictionary *result= [model runModel:image]; + NSDictionary *result = [model runModel:image returnClasses:classesOfInterest]; resolve(result); return; diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 95535ba0..91d1afc0 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -3,7 +3,8 @@ @interface ImageSegmentationModel : BaseModel - (cv::Size)getModelImageSize; -- (NSDictionary *)runModel:(cv::Mat &)input; +- (NSDictionary *)runModel:(cv::Mat &)input + returnClasses:(NSArray *)classesOfInterest; @end diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 52707a4e..9c8959b2 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -1,4 +1,5 @@ #import "ImageSegmentationModel.h" +#import #import "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" #import "opencv2/opencv.hpp" @@ -6,7 +7,8 @@ @interface ImageSegmentationModel () - (NSArray *)preprocess:(cv::Mat &)input; - - (NSDictionary *)postprocess:(NSArray *)output; + - (NSDictionary *)postprocess:(NSArray *)output + returnClasses:(NSArray *)classesOfInterest; @end @implementation ImageSegmentationModel { @@ -35,7 +37,8 @@ - (NSArray *)preprocess:(cv::Mat &)input { return modelInput; } -- (NSDictionary *)postprocess:(NSArray *)output { +- (NSDictionary *)postprocess:(NSArray *)output + returnClasses:(NSArray *)classesOfInterest{ cv::Size modelImageSize = [self getModelImageSize]; std::size_t numLabels = deeplabv3_resnet50_labels.size(); @@ -87,13 +90,21 @@ - (NSDictionary *)postprocess:(NSArray *)output { maxArg.at(row, col) = maxArgIndex; } + std::unordered_set labelSet; + + for (id label in classesOfInterest) { + labelSet.insert(std::string([label UTF8String])); + } + NSMutableDictionary *result = [NSMutableDictionary dictionary]; - + // Convert to NSArray and populate the final dictionary for (std::size_t label = 0; label < numLabels; ++label) { - NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); - NSMutableArray *arr = matToNSArray(resizedLabelScores[label]); - result[labelString] = arr; + if (labelSet.contains(deeplabv3_resnet50_labels[label])){ + NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); + NSArray *arr = matToNSArray(resizedLabelScores[label]); + result[labelString] = arr; + } } result[@"argmax"] = matToNSArray(maxArg); @@ -101,11 +112,12 @@ - (NSDictionary *)postprocess:(NSArray *)output { return result; } -- (NSDictionary *)runModel:(cv::Mat &)input { +- (NSDictionary *)runModel:(cv::Mat &)input + returnClasses:(NSArray *)classesOfInterest { NSArray *modelInput = [self preprocess:input]; NSArray *result = [self forward:modelInput]; - NSDictionary *output = [self postprocess:result[0]]; + NSDictionary *output = [self postprocess:result[0] returnClasses:classesOfInterest]; return output; } diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 746a45e0..5b583e5e 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -1,6 +1,7 @@ -import { useState } from 'react'; +import { useState, useEffect } from 'react'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; -import { useModule } from '../useModule'; +import { fetchResource } from '../../utils/fetchResource'; +import { ETError, getError } from '../../Error'; interface Props { modelSource: string | number; @@ -13,19 +14,52 @@ export const useImageSegmentation = ({ isReady: boolean; isGenerating: boolean; downloadProgress: number; - forward: (input: string) => Promise<{ [category: string]: number[] }>; + forward: ( + input: string, + classesOfInterest?: string[] + ) => Promise<{ [category: string]: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); - const { - error, - isReady, - isGenerating, - downloadProgress, - forwardImage: forward, - } = useModule({ - modelSource, - module, - }); + const [error, setError] = useState(null); + const [isReady, setIsReady] = useState(false); + const [downloadProgress, setDownloadProgress] = useState(0); + const [isGenerating, setIsGenerating] = useState(false); + + useEffect(() => { + const loadModel = async () => { + if (!modelSource) return; + + try { + setIsReady(false); + const fileUri = await fetchResource(modelSource, setDownloadProgress); + await module.loadModule(fileUri); + setIsReady(true); + } catch (e) { + setError(getError(e)); + } + }; + + loadModel(); + }, [modelSource, module]); + + const forward = async (input: string, classesOfInterest?: string[]) => { + if (!isReady) { + throw new Error(getError(ETError.ModuleNotLoaded)); + } + if (isGenerating) { + throw new Error(getError(ETError.ModelGenerating)); + } + + try { + setIsGenerating(true); + const output = await module.forward(input, classesOfInterest || []); + return output; + } catch (e) { + throw new Error(getError(e)); + } finally { + setIsGenerating(false); + } + }; return { error, isReady, isGenerating, downloadProgress, forward }; }; diff --git a/src/modules/computer_vision/BaseCVModule.ts b/src/modules/computer_vision/BaseCVModule.ts index 2702a6f9..c61987d3 100644 --- a/src/modules/computer_vision/BaseCVModule.ts +++ b/src/modules/computer_vision/BaseCVModule.ts @@ -3,7 +3,6 @@ import { _StyleTransferModule, _ObjectDetectionModule, _ClassificationModule, - _ImageSegmentationModule, } from '../../native/RnExecutorchModules'; import { getError } from '../../Error'; @@ -11,7 +10,6 @@ export class BaseCVModule extends BaseModule { static module: | _StyleTransferModule | _ObjectDetectionModule - | _ImageSegmentationModule | _ClassificationModule; static async forward(input: string) { diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index eae124a8..041894c1 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -1,12 +1,19 @@ -import { BaseCVModule } from './BaseCVModule'; +import { BaseModule } from '../BaseModule'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; +import { getError } from '../../Error'; -export class ImageSegmentationModule extends BaseCVModule { +export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); - static async forward(input: string) { - return await (super.forward(input) as ReturnType< - _ImageSegmentationModule['forward'] - >); + static async forward(input: string, classesOfInteres?: string[]) { + console.log('# classes: ', classesOfInteres?.length); + try { + return await (this.module.forward( + input, + classesOfInteres || [] + ) as ReturnType<_ImageSegmentationModule['forward']>); + } catch (e) { + throw new Error(getError(e)); + } } } diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts index c65b4cb7..ccff2731 100644 --- a/src/native/NativeImageSegmentation.ts +++ b/src/native/NativeImageSegmentation.ts @@ -4,7 +4,10 @@ import { TurboModuleRegistry } from 'react-native'; export interface Spec extends TurboModule { loadModule(modelSource: string): Promise; - forward(input: string): Promise<{ [category: string]: number[] }>; + forward( + input: string, + classesOfInterest: string[] + ): Promise<{ [category: string]: number[] }>; } export default TurboModuleRegistry.get('ImageSegmentation'); diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts index c48c08a3..eb6e7087 100644 --- a/src/native/RnExecutorchModules.ts +++ b/src/native/RnExecutorchModules.ts @@ -132,9 +132,10 @@ const VerticalOCR = VerticalOCRSpec class _ImageSegmentationModule { async forward( - input: string + input: string, + classesOfInteres: string[] ): ReturnType { - return await ImageSegmentation.forward(input); + return await ImageSegmentation.forward(input, classesOfInteres); } async loadModule( modelSource: string | number From 7d15e1bb336e3e27281515ca4f11130f5d3d6721 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 5 Mar 2025 13:29:50 +0100 Subject: [PATCH 05/16] Cleanup postprocess method --- .../ImageSegmentationModel.mm | 50 ++++++++++++------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 9c8959b2..de121b14 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -37,19 +37,10 @@ - (NSArray *)preprocess:(cv::Mat &)input { return modelInput; } -- (NSDictionary *)postprocess:(NSArray *)output - returnClasses:(NSArray *)classesOfInterest{ - cv::Size modelImageSize = [self getModelImageSize]; - - std::size_t numLabels = deeplabv3_resnet50_labels.size(); +std::vector rescaleResults(NSArray *result, std::size_t numLabels, + cv::Size modelImageSize, cv::Size originalSize) { std::size_t numModelPixels = modelImageSize.height * modelImageSize.width; - std::size_t numOriginalPixels = originalSize.height * originalSize.width; - std::size_t outputSize = (std::size_t)output.count; - - NSAssert(outputSize == numLabels * numModelPixels, - @"Model generated unexpected output size."); - // For each label extract it's matrix and rescale it to the original size std::vector resizedLabelScores(numLabels); for (std::size_t label = 0; label < numLabels; ++label) { cv::Mat labelMat = cv::Mat(modelImageSize, CV_64F); @@ -57,30 +48,32 @@ - (NSDictionary *)postprocess:(NSArray *)output for(std::size_t pixel = 0; pixel < numModelPixels; ++pixel){ int row = pixel / modelImageSize.width; int col = pixel % modelImageSize.width; - labelMat.at(row, col) = [output[label * numModelPixels + pixel] doubleValue]; + labelMat.at(row, col) = [result[label * numModelPixels + pixel] doubleValue]; } cv::resize(labelMat, resizedLabelScores[label], originalSize); } + return resizedLabelScores; +} - cv::Mat maxArg = cv::Mat(originalSize, CV_32S); - - // For each pixel apply softmax across all the labels +void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, + cv::Size originalSize, std::size_t numLabels) { + std::size_t numOriginalPixels = originalSize.height * originalSize.width; for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { int row = pixel / originalSize.width; int col = pixel % originalSize.width; std::vector scores; scores.reserve(numLabels); - for (const cv::Mat& mat : resizedLabelScores) { + for (const cv::Mat& mat : labelScores) { scores.push_back(mat.at(row, col)); } - + std::vector adjustedScores = softmax(scores); - + std::size_t maxArgIndex = 0; double maxArgVal = 0; for (std::size_t label = 0; label < numLabels; ++label) { - resizedLabelScores[label].at(row, col) = adjustedScores[label]; + labelScores[label].at(row, col) = adjustedScores[label]; if (adjustedScores[label] > maxArgVal) { maxArgIndex = label; maxArgVal = adjustedScores[label]; @@ -89,6 +82,25 @@ - (NSDictionary *)postprocess:(NSArray *)output maxArg.at(row, col) = maxArgIndex; } +} + +- (NSDictionary *)postprocess:(NSArray *)output + returnClasses:(NSArray *)classesOfInterest{ + cv::Size modelImageSize = [self getModelImageSize]; + + std::size_t numLabels = deeplabv3_resnet50_labels.size(); + + NSAssert((std::size_t)output.count == numLabels * modelImageSize.height * modelImageSize.width, + @"Model generated unexpected output size."); + + // For each label extract it's matrix and rescale it to the original size + std::vector resizedLabelScores = + rescaleResults(output, numLabels, modelImageSize, originalSize); + + cv::Mat maxArg = cv::Mat(originalSize, CV_32S); + + // For each pixel apply softmax across all the labels and calculate the maxArg + adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels); std::unordered_set labelSet; From 3f3a5047852c6cb75bb9c80fc00b00661e72e70d Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Fri, 7 Mar 2025 14:32:42 +0100 Subject: [PATCH 06/16] Move matrix functionality to utils --- .../ImageSegmentationModel.h | 15 +--------- .../ImageSegmentationModel.mm | 7 +++-- ios/RnExecutorch/utils/Conversions.h | 15 ++++++++++ .../computer_vision/useImageSegmentation.ts | 28 ++++--------------- .../ImageSegmentationModule.ts | 1 - 5 files changed, 26 insertions(+), 40 deletions(-) create mode 100644 ios/RnExecutorch/utils/Conversions.h diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 91d1afc0..40b39583 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -6,17 +6,4 @@ - (NSDictionary *)runModel:(cv::Mat &)input returnClasses:(NSArray *)classesOfInterest; -@end - -template -NSArray* matToNSArray(const cv::Mat& mat) { - std::size_t numPixels = mat.rows * mat.cols; - NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels]; - - for (std::size_t x = 0; x < mat.rows; ++x) { - for (std::size_t y = 0; y < mat.cols; ++y) { - arr[x * mat.cols + y] = @(mat.at(x, y)); - } - } - return arr; -} \ No newline at end of file +@end \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index de121b14..1dc9fb7b 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -2,6 +2,7 @@ #import #import "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" +#import "../../utils/Conversions.h" #import "opencv2/opencv.hpp" #import "Constants.h" @@ -64,7 +65,7 @@ void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, int col = pixel % originalSize.width; std::vector scores; scores.reserve(numLabels); - for (const cv::Mat& mat : labelScores) { + for (const auto& mat : labelScores) { scores.push_back(mat.at(row, col)); } @@ -114,12 +115,12 @@ - (NSDictionary *)postprocess:(NSArray *)output for (std::size_t label = 0; label < numLabels; ++label) { if (labelSet.contains(deeplabv3_resnet50_labels[label])){ NSString *labelString = @(deeplabv3_resnet50_labels[label].c_str()); - NSArray *arr = matToNSArray(resizedLabelScores[label]); + NSArray *arr = simpleMatToNSArray(resizedLabelScores[label]); result[labelString] = arr; } } - result[@"argmax"] = matToNSArray(maxArg); + result[@"argmax"] = simpleMatToNSArray(maxArg); return result; } diff --git a/ios/RnExecutorch/utils/Conversions.h b/ios/RnExecutorch/utils/Conversions.h new file mode 100644 index 00000000..a83ec5fb --- /dev/null +++ b/ios/RnExecutorch/utils/Conversions.h @@ -0,0 +1,15 @@ +#import "opencv2/opencv.hpp" + +// Convert a matrix containing a single value per cell to a NSArray +template +NSArray* simpleMatToNSArray(const cv::Mat& mat) { + std::size_t numPixels = mat.rows * mat.cols; + NSMutableArray *arr = [[NSMutableArray alloc] initWithCapacity:numPixels]; + + for (std::size_t x = 0; x < mat.rows; ++x) { + for (std::size_t y = 0; y < mat.cols; ++y) { + arr[x * mat.cols + y] = @(mat.at(x, y)); + } + } + return arr; +} diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 5b583e5e..7b650d30 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -1,7 +1,7 @@ -import { useState, useEffect } from 'react'; +import { useState } from 'react'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; -import { fetchResource } from '../../utils/fetchResource'; import { ETError, getError } from '../../Error'; +import { useModule } from '../useModule'; interface Props { modelSource: string | number; @@ -20,27 +20,11 @@ export const useImageSegmentation = ({ ) => Promise<{ [category: string]: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); - const [error, setError] = useState(null); - const [isReady, setIsReady] = useState(false); - const [downloadProgress, setDownloadProgress] = useState(0); const [isGenerating, setIsGenerating] = useState(false); - - useEffect(() => { - const loadModel = async () => { - if (!modelSource) return; - - try { - setIsReady(false); - const fileUri = await fetchResource(modelSource, setDownloadProgress); - await module.loadModule(fileUri); - setIsReady(true); - } catch (e) { - setError(getError(e)); - } - }; - - loadModel(); - }, [modelSource, module]); + const { error, isReady, downloadProgress } = useModule({ + modelSource, + module, + }); const forward = async (input: string, classesOfInterest?: string[]) => { if (!isReady) { diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index 041894c1..f6646feb 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -6,7 +6,6 @@ export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); static async forward(input: string, classesOfInteres?: string[]) { - console.log('# classes: ', classesOfInteres?.length); try { return await (this.module.forward( input, From 11e6a852350b64f705e9f81c9f7b87fe635f9465 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Mon, 10 Mar 2025 11:34:30 +0100 Subject: [PATCH 07/16] Add label enum to segmentation I/O --- .../ImageSegmentationModel.mm | 23 ++++++++---------- .../image_segmentation/image_segmentation.ts | 23 ------------------ .../computer_vision/useImageSegmentation.ts | 23 ++++++++++++++---- src/index.tsx | 1 + .../ImageSegmentationModule.ts | 9 ++++--- src/types/image_segmentation.ts | 24 +++++++++++++++++++ 6 files changed, 57 insertions(+), 46 deletions(-) delete mode 100644 src/constants/image_segmentation/image_segmentation.ts create mode 100644 src/types/image_segmentation.ts diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 1dc9fb7b..7aa10a9f 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -1,6 +1,8 @@ #import "ImageSegmentationModel.h" #import -#import "../../utils/ImageProcessor.h" +#import +#import +#i\port "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" #import "../../utils/Conversions.h" #import "opencv2/opencv.hpp" @@ -57,7 +59,7 @@ - (NSArray *)preprocess:(cv::Mat &)input { return resizedLabelScores; } -void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, +void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& argMax, cv::Size originalSize, std::size_t numLabels) { std::size_t numOriginalPixels = originalSize.height * originalSize.width; for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { @@ -71,17 +73,12 @@ void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& maxArg, std::vector adjustedScores = softmax(scores); - std::size_t maxArgIndex = 0; - double maxArgVal = 0; for (std::size_t label = 0; label < numLabels; ++label) { labelScores[label].at(row, col) = adjustedScores[label]; - if (adjustedScores[label] > maxArgVal) { - maxArgIndex = label; - maxArgVal = adjustedScores[label]; - } } - maxArg.at(row, col) = maxArgIndex; + auto maxIt = std::max_element(scores.begin(), scores.end()); + argMax.at(row, col) = std::distance(scores.begin(), maxIt); } } @@ -98,10 +95,10 @@ - (NSDictionary *)postprocess:(NSArray *)output std::vector resizedLabelScores = rescaleResults(output, numLabels, modelImageSize, originalSize); - cv::Mat maxArg = cv::Mat(originalSize, CV_32S); + cv::Mat argMax = cv::Mat(originalSize, CV_32S); - // For each pixel apply softmax across all the labels and calculate the maxArg - adjustScoresPerPixel(resizedLabelScores, maxArg, originalSize, numLabels); + // For each pixel apply softmax across all the labels and calculate the argMax + adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels); std::unordered_set labelSet; @@ -120,7 +117,7 @@ - (NSDictionary *)postprocess:(NSArray *)output } } - result[@"argmax"] = simpleMatToNSArray(maxArg); + result[@"argmax"] = simpleMatToNSArray(argMax); return result; } diff --git a/src/constants/image_segmentation/image_segmentation.ts b/src/constants/image_segmentation/image_segmentation.ts deleted file mode 100644 index b2f15f44..00000000 --- a/src/constants/image_segmentation/image_segmentation.ts +++ /dev/null @@ -1,23 +0,0 @@ -export const classLabels = new Map([ - [0, 'background'], - [1, 'aeroplane'], - [2, 'bicycle'], - [3, 'bird'], - [4, 'boat'], - [5, 'bottle'], - [6, 'bus'], - [7, 'car'], - [8, 'cat'], - [9, 'chair'], - [10, 'cow'], - [11, 'diningtable'], - [12, 'dog'], - [13, 'horse'], - [14, 'motorbike'], - [15, 'person'], - [16, 'pottedplant'], - [17, 'sheep'], - [18, 'sofa'], - [19, 'train'], - [20, 'tvmonitor'], -]); diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 7b650d30..43bd98f1 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -2,6 +2,7 @@ import { useState } from 'react'; import { _ImageSegmentationModule } from '../../native/RnExecutorchModules'; import { ETError, getError } from '../../Error'; import { useModule } from '../useModule'; +import { DeeplabLabel } from '../../types/image_segmentation'; interface Props { modelSource: string | number; @@ -16,8 +17,8 @@ export const useImageSegmentation = ({ downloadProgress: number; forward: ( input: string, - classesOfInterest?: string[] - ) => Promise<{ [category: string]: number[] }>; + classesOfInterest?: DeeplabLabel[] + ) => Promise<{ [key in DeeplabLabel]?: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); const [isGenerating, setIsGenerating] = useState(false); @@ -26,7 +27,7 @@ export const useImageSegmentation = ({ module, }); - const forward = async (input: string, classesOfInterest?: string[]) => { + const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => { if (!isReady) { throw new Error(getError(ETError.ModuleNotLoaded)); } @@ -36,8 +37,20 @@ export const useImageSegmentation = ({ try { setIsGenerating(true); - const output = await module.forward(input, classesOfInterest || []); - return output; + const stringDict = await module.forward( + input, + (classesOfInterest || []).map((label) => DeeplabLabel[label]) + ); + + let enumDict: { [key in DeeplabLabel]?: number[] } = {}; + + for (const key in stringDict) { + if (key in DeeplabLabel) { + const enumKey = DeeplabLabel[key as keyof typeof DeeplabLabel]; + enumDict[enumKey] = stringDict[key]; + } + } + return enumDict; } catch (e) { throw new Error(getError(e)); } finally { diff --git a/src/index.tsx b/src/index.tsx index 8d322696..c4ae2f55 100644 --- a/src/index.tsx +++ b/src/index.tsx @@ -30,6 +30,7 @@ export * from './utils/listDownloadedResources'; // types export * from './types/object_detection'; export * from './types/ocr'; +export * from './types/image_segmentation'; // constants export * from './constants/modelUrls'; diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index f6646feb..f2a6a167 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -5,12 +5,11 @@ import { getError } from '../../Error'; export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); - static async forward(input: string, classesOfInteres?: string[]) { + static async forward(input: string, classesOfInterest: string[]) { try { - return await (this.module.forward( - input, - classesOfInteres || [] - ) as ReturnType<_ImageSegmentationModule['forward']>); + return await (this.module.forward(input, classesOfInterest) as ReturnType< + _ImageSegmentationModule['forward'] + >); } catch (e) { throw new Error(getError(e)); } diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts new file mode 100644 index 00000000..7d03d517 --- /dev/null +++ b/src/types/image_segmentation.ts @@ -0,0 +1,24 @@ +export enum DeeplabLabel { + background, + aeroplane, + bicycle, + bird, + boat, + bottle, + bus, + car, + cat, + chair, + cow, + diningtable, + dog, + horse, + motorbike, + person, + pottedplant, + sheep, + sofa, + train, + tvmonitor, + argmax, // Additional label not present in the model +} From dcc20089aae67e672ef94291ef386c7d931427ce Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Mon, 10 Mar 2025 12:30:04 +0100 Subject: [PATCH 08/16] Add optionality for segmentation output resize --- ios/RnExecutorch/ImageSegmentation.mm | 9 ++-- .../ImageSegmentationModel.h | 3 +- .../ImageSegmentationModel.mm | 46 ++++++++++++------- .../computer_vision/useImageSegmentation.ts | 12 +++-- .../ImageSegmentationModule.ts | 14 ++++-- src/native/NativeImageSegmentation.ts | 3 +- src/native/RnExecutorchModules.ts | 5 +- 7 files changed, 61 insertions(+), 31 deletions(-) diff --git a/ios/RnExecutorch/ImageSegmentation.mm b/ios/RnExecutorch/ImageSegmentation.mm index cf9366d5..19cbe664 100644 --- a/ios/RnExecutorch/ImageSegmentation.mm +++ b/ios/RnExecutorch/ImageSegmentation.mm @@ -35,13 +35,16 @@ - (void)loadModule:(NSString *)modelSource - (void)forward:(NSString *)input classesOfInterest:(NSArray *)classesOfInterest + resize:(BOOL)resize resolve:(RCTPromiseResolveBlock)resolve - reject:(RCTPromiseRejectBlock)reject { + reject:(RCTPromiseRejectBlock)reject { @try { cv::Mat image = [ImageProcessor readImage:input]; - NSDictionary *result = [model runModel:image returnClasses:classesOfInterest]; - + NSDictionary *result = [model runModel:image + returnClasses:classesOfInterest + resize:resize]; + resolve(result); return; } @catch (NSException *exception) { diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h index 40b39583..a58733a1 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.h @@ -4,6 +4,7 @@ @interface ImageSegmentationModel : BaseModel - (cv::Size)getModelImageSize; - (NSDictionary *)runModel:(cv::Mat &)input - returnClasses:(NSArray *)classesOfInterest; + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize; @end \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 7aa10a9f..70638bd4 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -2,7 +2,7 @@ #import #import #import -#i\port "../../utils/ImageProcessor.h" +#import "../../utils/ImageProcessor.h" #import "../../utils/Numerical.h" #import "../../utils/Conversions.h" #import "opencv2/opencv.hpp" @@ -11,7 +11,8 @@ @interface ImageSegmentationModel () - (NSArray *)preprocess:(cv::Mat &)input; - (NSDictionary *)postprocess:(NSArray *)output - returnClasses:(NSArray *)classesOfInterest; + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize; @end @implementation ImageSegmentationModel { @@ -40,8 +41,8 @@ - (NSArray *)preprocess:(cv::Mat &)input { return modelInput; } -std::vector rescaleResults(NSArray *result, std::size_t numLabels, - cv::Size modelImageSize, cv::Size originalSize) { +std::vector extractResults(NSArray *result, std::size_t numLabels, + cv::Size modelImageSize, cv::Size originalSize, BOOL resize) { std::size_t numModelPixels = modelImageSize.height * modelImageSize.width; std::vector resizedLabelScores(numLabels); @@ -54,17 +55,22 @@ - (NSArray *)preprocess:(cv::Mat &)input { labelMat.at(row, col) = [result[label * numModelPixels + pixel] doubleValue]; } - cv::resize(labelMat, resizedLabelScores[label], originalSize); + if (resize) { + cv::resize(labelMat, resizedLabelScores[label], originalSize); + } + else { + resizedLabelScores[label] = std::move(labelMat); + } } return resizedLabelScores; } void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& argMax, - cv::Size originalSize, std::size_t numLabels) { - std::size_t numOriginalPixels = originalSize.height * originalSize.width; - for (std::size_t pixel = 0; pixel < numOriginalPixels; ++pixel) { - int row = pixel / originalSize.width; - int col = pixel % originalSize.width; + cv::Size outputSize, std::size_t numLabels) { + std::size_t numOutputPixels = outputSize.height * outputSize.width; + for (std::size_t pixel = 0; pixel < numOutputPixels; ++pixel) { + int row = pixel / outputSize.width; + int col = pixel % outputSize.width; std::vector scores; scores.reserve(numLabels); for (const auto& mat : labelScores) { @@ -83,7 +89,8 @@ void adjustScoresPerPixel(std::vector& labelScores, cv::Mat& argMax, } - (NSDictionary *)postprocess:(NSArray *)output - returnClasses:(NSArray *)classesOfInterest{ + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize { cv::Size modelImageSize = [self getModelImageSize]; std::size_t numLabels = deeplabv3_resnet50_labels.size(); @@ -91,14 +98,16 @@ - (NSDictionary *)postprocess:(NSArray *)output NSAssert((std::size_t)output.count == numLabels * modelImageSize.height * modelImageSize.width, @"Model generated unexpected output size."); - // For each label extract it's matrix and rescale it to the original size + // For each label extract it's matrix, + // and rescale it to the original size if `resize` std::vector resizedLabelScores = - rescaleResults(output, numLabels, modelImageSize, originalSize); + extractResults(output, numLabels, modelImageSize, originalSize, resize); - cv::Mat argMax = cv::Mat(originalSize, CV_32S); + cv::Size outputSize = resize ? originalSize : modelImageSize; + cv::Mat argMax = cv::Mat(outputSize, CV_32S); // For each pixel apply softmax across all the labels and calculate the argMax - adjustScoresPerPixel(resizedLabelScores, argMax, originalSize, numLabels); + adjustScoresPerPixel(resizedLabelScores, argMax, outputSize, numLabels); std::unordered_set labelSet; @@ -123,11 +132,14 @@ - (NSDictionary *)postprocess:(NSArray *)output } - (NSDictionary *)runModel:(cv::Mat &)input - returnClasses:(NSArray *)classesOfInterest { + returnClasses:(NSArray *)classesOfInterest + resize:(BOOL)resize { NSArray *modelInput = [self preprocess:input]; NSArray *result = [self forward:modelInput]; - NSDictionary *output = [self postprocess:result[0] returnClasses:classesOfInterest]; + NSDictionary *output = [self postprocess:result[0] + returnClasses:classesOfInterest + resize:resize]; return output; } diff --git a/src/hooks/computer_vision/useImageSegmentation.ts b/src/hooks/computer_vision/useImageSegmentation.ts index 43bd98f1..4e562d6b 100644 --- a/src/hooks/computer_vision/useImageSegmentation.ts +++ b/src/hooks/computer_vision/useImageSegmentation.ts @@ -17,7 +17,8 @@ export const useImageSegmentation = ({ downloadProgress: number; forward: ( input: string, - classesOfInterest?: DeeplabLabel[] + classesOfInterest?: DeeplabLabel[], + resize?: boolean ) => Promise<{ [key in DeeplabLabel]?: number[] }>; } => { const [module, _] = useState(() => new _ImageSegmentationModule()); @@ -27,7 +28,11 @@ export const useImageSegmentation = ({ module, }); - const forward = async (input: string, classesOfInterest?: DeeplabLabel[]) => { + const forward = async ( + input: string, + classesOfInterest?: DeeplabLabel[], + resize?: boolean + ) => { if (!isReady) { throw new Error(getError(ETError.ModuleNotLoaded)); } @@ -39,7 +44,8 @@ export const useImageSegmentation = ({ setIsGenerating(true); const stringDict = await module.forward( input, - (classesOfInterest || []).map((label) => DeeplabLabel[label]) + (classesOfInterest || []).map((label) => DeeplabLabel[label]), + resize || false ); let enumDict: { [key in DeeplabLabel]?: number[] } = {}; diff --git a/src/modules/computer_vision/ImageSegmentationModule.ts b/src/modules/computer_vision/ImageSegmentationModule.ts index f2a6a167..1d078c1c 100644 --- a/src/modules/computer_vision/ImageSegmentationModule.ts +++ b/src/modules/computer_vision/ImageSegmentationModule.ts @@ -5,11 +5,17 @@ import { getError } from '../../Error'; export class ImageSegmentationModule extends BaseModule { static module = new _ImageSegmentationModule(); - static async forward(input: string, classesOfInterest: string[]) { + static async forward( + input: string, + classesOfInterest: string[], + resize: boolean + ) { try { - return await (this.module.forward(input, classesOfInterest) as ReturnType< - _ImageSegmentationModule['forward'] - >); + return await (this.module.forward( + input, + classesOfInterest, + resize + ) as ReturnType<_ImageSegmentationModule['forward']>); } catch (e) { throw new Error(getError(e)); } diff --git a/src/native/NativeImageSegmentation.ts b/src/native/NativeImageSegmentation.ts index ccff2731..c66c8743 100644 --- a/src/native/NativeImageSegmentation.ts +++ b/src/native/NativeImageSegmentation.ts @@ -6,7 +6,8 @@ export interface Spec extends TurboModule { forward( input: string, - classesOfInterest: string[] + classesOfInterest: string[], + resize: boolean ): Promise<{ [category: string]: number[] }>; } diff --git a/src/native/RnExecutorchModules.ts b/src/native/RnExecutorchModules.ts index eb6e7087..62ebd309 100644 --- a/src/native/RnExecutorchModules.ts +++ b/src/native/RnExecutorchModules.ts @@ -133,9 +133,10 @@ const VerticalOCR = VerticalOCRSpec class _ImageSegmentationModule { async forward( input: string, - classesOfInteres: string[] + classesOfInteres: string[], + resize: boolean ): ReturnType { - return await ImageSegmentation.forward(input, classesOfInteres); + return await ImageSegmentation.forward(input, classesOfInteres, resize); } async loadModule( modelSource: string | number From 0405cb0988ecd038be42a454ac5609cd1601a683 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 12 Mar 2025 15:12:41 +0100 Subject: [PATCH 09/16] Change segmentation enum values to upper case --- .../models/image_segmentation/Constants.mm | 8 ++-- .../ImageSegmentationModel.mm | 2 +- src/types/image_segmentation.ts | 44 +++++++++---------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/ios/RnExecutorch/models/image_segmentation/Constants.mm b/ios/RnExecutorch/models/image_segmentation/Constants.mm index 4d98f34d..84ce9ea6 100644 --- a/ios/RnExecutorch/models/image_segmentation/Constants.mm +++ b/ios/RnExecutorch/models/image_segmentation/Constants.mm @@ -3,8 +3,8 @@ #import const std::vector deeplabv3_resnet50_labels = { - "background", "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", - "dog", "horse", "motorbike", "person", "pottedplant", "sheep", - "sofa", "train", "tvmonitor" + "BACKGROUND", "AEROPLANE", "BICYCLE", "BIRD", "BOAT", + "BOTTLE", "BUS", "CAR", "CAT", "CHAIR", "COW", "DININGTABLE", + "DOG", "HORSE", "MOTORBIKE", "PERSON", "POTTEDPLANT", "SHEEP", + "SOFA", "TRAIN", "TVMONITOR" }; \ No newline at end of file diff --git a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm index 70638bd4..951687c5 100644 --- a/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm +++ b/ios/RnExecutorch/models/image_segmentation/ImageSegmentationModel.mm @@ -126,7 +126,7 @@ - (NSDictionary *)postprocess:(NSArray *)output } } - result[@"argmax"] = simpleMatToNSArray(argMax); + result[@"ARGMAX"] = simpleMatToNSArray(argMax); return result; } diff --git a/src/types/image_segmentation.ts b/src/types/image_segmentation.ts index 7d03d517..bc7d254d 100644 --- a/src/types/image_segmentation.ts +++ b/src/types/image_segmentation.ts @@ -1,24 +1,24 @@ export enum DeeplabLabel { - background, - aeroplane, - bicycle, - bird, - boat, - bottle, - bus, - car, - cat, - chair, - cow, - diningtable, - dog, - horse, - motorbike, - person, - pottedplant, - sheep, - sofa, - train, - tvmonitor, - argmax, // Additional label not present in the model + BACKGROUND, + AEROPLANE, + BICYCLE, + BIRD, + BOAT, + BOTTLE, + BUS, + CAR, + CAT, + CHAIR, + COW, + DININGTABLE, + DOG, + HORSE, + MOTORBIKE, + PERSON, + POTTEDPLANT, + SHEEP, + SOFA, + TRAIN, + TVMONITOR, + ARGMAX, // Additional label not present in the model } From 9d4efb8d1ca6c0bebd84cef2074872cd87558964 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 5 Mar 2025 14:36:03 +0100 Subject: [PATCH 10/16] Move style transfer model --- .../src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt | 2 +- .../models/{ => styleTransfer}/StyleTransferModel.kt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename android/src/main/java/com/swmansion/rnexecutorch/models/{ => styleTransfer}/StyleTransferModel.kt (96%) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt b/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt index 54132b88..224794e1 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/StyleTransfer.kt @@ -3,7 +3,7 @@ package com.swmansion.rnexecutorch import android.util.Log import com.facebook.react.bridge.Promise import com.facebook.react.bridge.ReactApplicationContext -import com.swmansion.rnexecutorch.models.StyleTransferModel +import com.swmansion.rnexecutorch.models.styletransfer.StyleTransferModel import com.swmansion.rnexecutorch.utils.ETError import com.swmansion.rnexecutorch.utils.ImageProcessor import org.opencv.android.OpenCVLoader diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/StyleTransferModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt similarity index 96% rename from android/src/main/java/com/swmansion/rnexecutorch/models/StyleTransferModel.kt rename to android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt index 72d3bc6d..4c381caa 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/StyleTransferModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt @@ -1,4 +1,4 @@ -package com.swmansion.rnexecutorch.models +package com.swmansion.rnexecutorch.models.styletransfer import com.facebook.react.bridge.ReactApplicationContext import com.swmansion.rnexecutorch.utils.ImageProcessor From 60ae07bf536cde90383ec91501aee216086dd86a Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Fri, 7 Mar 2025 08:10:19 +0100 Subject: [PATCH 11/16] Move the softmax implementation --- .../{models/classification/Utils.kt => utils/Numerical.kt} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename android/src/main/java/com/swmansion/rnexecutorch/{models/classification/Utils.kt => utils/Numerical.kt} (77%) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Utils.kt b/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt similarity index 77% rename from android/src/main/java/com/swmansion/rnexecutorch/models/classification/Utils.kt rename to android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt index e919950a..603699e3 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/classification/Utils.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/utils/Numerical.kt @@ -1,4 +1,4 @@ -package com.swmansion.rnexecutorch.models.classification +package com.swmansion.rnexecutorch.utils fun softmax(x: Array): Array { val max = x.maxOrNull()!! From 899e440509f35cc6e090fa8525df7d7af5fff0e8 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Fri, 7 Mar 2025 11:15:05 +0100 Subject: [PATCH 12/16] Add image segmentation model logic --- .../rnexecutorch/ImageSegmentation.kt | 52 ++++++++ .../rnexecutorch/RnExecutorchPackage.kt | 9 ++ .../classification/ClassificationModel.kt | 1 + .../models/imageSegmentation/Constants.kt | 8 ++ .../ImageSegmentationModel.kt | 120 ++++++++++++++++++ .../styleTransfer/StyleTransferModel.kt | 1 + 6 files changed, 191 insertions(+) create mode 100644 android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt create mode 100644 android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt create mode 100644 android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt diff --git a/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt new file mode 100644 index 00000000..e2f83e44 --- /dev/null +++ b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt @@ -0,0 +1,52 @@ +package com.swmansion.rnexecutorch + +import android.util.Log +import com.facebook.react.bridge.Promise +import com.facebook.react.bridge.ReadableArray +import com.facebook.react.bridge.ReactApplicationContext +import com.swmansion.rnexecutorch.utils.ETError +import com.swmansion.rnexecutorch.models.imagesegmentation.ImageSegmentationModel +import com.swmansion.rnexecutorch.utils.ImageProcessor +import org.opencv.android.OpenCVLoader + +class ImageSegmentation(reactContext: ReactApplicationContext) : + NativeImageSegmentationSpec(reactContext) { + + private lateinit var model: ImageSegmentationModel + + companion object { + const val NAME = "ImageSegmentation" + + init { + if(!OpenCVLoader.initLocal()){ + Log.d("rn_executorch", "OpenCV not loaded") + } else { + Log.d("rn_executorch", "OpenCV loaded") + } + } + } + + override fun loadModule(modelSource: String, promise: Promise) { + try { + model = ImageSegmentationModel(reactApplicationContext) + model.loadModel(modelSource) + promise.resolve(0) + } catch (e: Exception) { + promise.reject(e.message!!, ETError.InvalidModelSource.toString()) + } + } + + override fun forward(input: String, classesOfInterest: ReadableArray, promise: Promise) { + try { + val output = + model.runModel(Pair(ImageProcessor.readImage(input), classesOfInterest)) + promise.resolve(output) + }catch(e: Exception){ + promise.reject(e.message!!, e.message) + } + } + + override fun getName(): String { + return NAME + } +} diff --git a/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt index c88e3870..3c78d4d7 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt @@ -30,6 +30,8 @@ class RnExecutorchPackage : TurboReactPackage() { OCR(reactContext) } else if (name == VerticalOCR.NAME) { VerticalOCR(reactContext) + } else if (name == ImageSegmentation.NAME) { + ImageSegmentation(reactContext) } else { null } @@ -115,6 +117,13 @@ class RnExecutorchPackage : TurboReactPackage() { false, // isCxxModule true, ) + + moduleInfos[ImageSegmentation.NAME] = ReactModuleInfo( + ImageSegmentation.NAME, ImageSegmentation.NAME, false, // canOverrideExistingModule + false, // needsEagerInit + false, // isCxxModule + true + ) moduleInfos } } diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt index b60b0998..776f9a53 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt @@ -3,6 +3,7 @@ package com.swmansion.rnexecutorch.models.classification import com.facebook.react.bridge.ReactApplicationContext import com.swmansion.rnexecutorch.models.BaseModel import com.swmansion.rnexecutorch.utils.ImageProcessor +import com.swmansion.rnexecutorch.utils.softmax import org.opencv.core.Mat import org.opencv.core.Size import org.opencv.imgproc.Imgproc diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt new file mode 100644 index 00000000..e1f127b1 --- /dev/null +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt @@ -0,0 +1,8 @@ +package com.swmansion.rnexecutorch.models.imagesegmentation + +val deeplabv3_resnet50_labels: Array = arrayOf( + "background", "aeroplane", "bicycle", "bird", "boat", + "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", + "dog", "horse", "motorbike", "person", "pottedplant", "sheep", + "sofa", "train", "tvmonitor" +) \ No newline at end of file diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt new file mode 100644 index 00000000..89f4034d --- /dev/null +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt @@ -0,0 +1,120 @@ +package com.swmansion.rnexecutorch.models.imagesegmentation + +import com.facebook.react.bridge.ReadableArray +import com.facebook.react.bridge.ReactApplicationContext +import com.swmansion.rnexecutorch.utils.ImageProcessor +import com.swmansion.rnexecutorch.utils.softmax +import org.opencv.core.Mat +import org.opencv.core.CvType +import org.opencv.core.Size +import org.opencv.imgproc.Imgproc +import org.pytorch.executorch.Tensor +import org.pytorch.executorch.EValue +import com.swmansion.rnexecutorch.models.BaseModel + +class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) + : BaseModel , Map>>(reactApplicationContext) { + private lateinit var originalSize: Size + + private fun getModelImageSize(): Size { + val inputShape = module.getInputShape(0) + val width = inputShape[inputShape.lastIndex] + val height = inputShape[inputShape.lastIndex - 1] + + return Size(height.toDouble(), width.toDouble()) + } + + override fun preprocess(input: Pair): EValue { + originalSize = input.first.size() + Imgproc.resize(input.first, input.first, getModelImageSize()) + return ImageProcessor.matToEValue(input.first, module.getInputShape(0)) + } + + private fun rescaleResults(result: Array, numLabels: Int) + : List { + val modelShape = getModelImageSize() + val numModelPixels = (modelShape.height * modelShape.width).toInt() + + val resizedLabelScores = mutableListOf() + + for (label in 0.., numLabels: Int) + : Mat { + val argMax = Mat(originalSize, CvType.CV_32S) + val numOriginalPixels = (originalSize.height * originalSize.width).toInt() + for (pixel in 0..() + for (mat in labelScores) { + val v = FloatArray(1) + mat.get(row, col, v) + scores.add(v[0]) + } + + val adjustedScores = softmax(scores.toTypedArray()) + + for (label in 0..): Map> { + val output = output[0].toTensor().dataAsFloatArray.toTypedArray() + val modelShape = getModelImageSize() + val numLabels = deeplabv3_resnet50_labels.size; + val numOriginalPixels = (originalSize.height * originalSize.width).toInt() + + require(output.count() == (numLabels * modelShape.height * modelShape.width).toInt()) + {"Model generated unexpected output size."} + + val rescaledResults = rescaleResults(output, numLabels) + + val argMax = adjustScoresPerPixel(rescaledResults, numLabels) + + // val labelSet = mutableSetOf() + // Filter by the label set when base class changed + + val res = mutableMapOf>() + + for (label in 0..): Map> { + val modelInput = preprocess(input) + val modelOutput = forward(modelInput) + return postprocess(modelOutput) + } +} diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt index 4c381caa..4019015d 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt @@ -6,6 +6,7 @@ import org.opencv.core.Mat import org.opencv.core.Size import org.opencv.imgproc.Imgproc import org.pytorch.executorch.EValue +import com.swmansion.rnexecutorch.models.BaseModel class StyleTransferModel( reactApplicationContext: ReactApplicationContext, From d780a874a5db78ed63b95d96c3f6748070fa2437 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Tue, 11 Mar 2025 10:21:41 +0100 Subject: [PATCH 13/16] Adapt segmentation bridge types --- .../rnexecutorch/ImageSegmentation.kt | 6 ++- .../ImageSegmentationModel.kt | 48 ++++++++++++------- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt index e2f83e44..22f40c3f 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt @@ -2,6 +2,7 @@ package com.swmansion.rnexecutorch import android.util.Log import com.facebook.react.bridge.Promise +import com.facebook.react.bridge.WritableMap import com.facebook.react.bridge.ReadableArray import com.facebook.react.bridge.ReactApplicationContext import com.swmansion.rnexecutorch.utils.ETError @@ -36,7 +37,10 @@ class ImageSegmentation(reactContext: ReactApplicationContext) : } } - override fun forward(input: String, classesOfInterest: ReadableArray, promise: Promise) { + override fun forward(input: String, + classesOfInterest: ReadableArray, + resize:Boolean, + promise: Promise) { try { val output = model.runModel(Pair(ImageProcessor.readImage(input), classesOfInterest)) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt index 89f4034d..70c657f9 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt @@ -1,6 +1,8 @@ package com.swmansion.rnexecutorch.models.imagesegmentation +import com.facebook.react.bridge.Arguments; import com.facebook.react.bridge.ReadableArray +import com.facebook.react.bridge.WritableMap import com.facebook.react.bridge.ReactApplicationContext import com.swmansion.rnexecutorch.utils.ImageProcessor import com.swmansion.rnexecutorch.utils.softmax @@ -11,9 +13,10 @@ import org.opencv.imgproc.Imgproc import org.pytorch.executorch.Tensor import org.pytorch.executorch.EValue import com.swmansion.rnexecutorch.models.BaseModel +import com.swmansion.rnexecutorch.utils.ArrayUtils class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) - : BaseModel , Map>>(reactApplicationContext) { + : BaseModel , WritableMap>(reactApplicationContext) { private lateinit var originalSize: Size private fun getModelImageSize(): Size { @@ -58,30 +61,28 @@ class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) : Mat { val argMax = Mat(originalSize, CvType.CV_32S) val numOriginalPixels = (originalSize.height * originalSize.width).toInt() + android.util.Log.d("ETTT", "adjustScoresPerPixel: start") for (pixel in 0..() for (mat in labelScores) { - val v = FloatArray(1) - mat.get(row, col, v) - scores.add(v[0]) + scores.add(mat.get(row, col)[0].toFloat()) } - val adjustedScores = softmax(scores.toTypedArray()) - for (label in 0..): Map> { + override fun postprocess(output: Array) + : WritableMap { val output = output[0].toTensor().dataAsFloatArray.toTypedArray() val modelShape = getModelImageSize() val numLabels = deeplabv3_resnet50_labels.size; @@ -93,26 +94,37 @@ class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) val rescaledResults = rescaleResults(output, numLabels) val argMax = adjustScoresPerPixel(rescaledResults, numLabels) - + // val labelSet = mutableSetOf() // Filter by the label set when base class changed - val res = mutableMapOf>() - + val res = Arguments.createMap() + for (label in 0..): Map> { + override fun runModel(input: Pair) + : WritableMap { val modelInput = preprocess(input) val modelOutput = forward(modelInput) return postprocess(modelOutput) From 209a8ec5436a8f7d84e6069ffb35dc5359a343fb Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Tue, 11 Mar 2025 11:21:36 +0100 Subject: [PATCH 14/16] Add optional resize for Android segmentation --- .../rnexecutorch/ImageSegmentation.kt | 36 ++--- .../ImageSegmentationModel.kt | 141 ++++++++++-------- 2 files changed, 100 insertions(+), 77 deletions(-) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt index 22f40c3f..c18fa8ed 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt @@ -2,24 +2,23 @@ package com.swmansion.rnexecutorch import android.util.Log import com.facebook.react.bridge.Promise -import com.facebook.react.bridge.WritableMap -import com.facebook.react.bridge.ReadableArray import com.facebook.react.bridge.ReactApplicationContext -import com.swmansion.rnexecutorch.utils.ETError +import com.facebook.react.bridge.ReadableArray import com.swmansion.rnexecutorch.models.imagesegmentation.ImageSegmentationModel +import com.swmansion.rnexecutorch.utils.ETError import com.swmansion.rnexecutorch.utils.ImageProcessor import org.opencv.android.OpenCVLoader -class ImageSegmentation(reactContext: ReactApplicationContext) : - NativeImageSegmentationSpec(reactContext) { - +class ImageSegmentation( + reactContext: ReactApplicationContext, +) : NativeImageSegmentationSpec(reactContext) { private lateinit var model: ImageSegmentationModel companion object { const val NAME = "ImageSegmentation" init { - if(!OpenCVLoader.initLocal()){ + if (!OpenCVLoader.initLocal()) { Log.d("rn_executorch", "OpenCV not loaded") } else { Log.d("rn_executorch", "OpenCV loaded") @@ -27,7 +26,10 @@ class ImageSegmentation(reactContext: ReactApplicationContext) : } } - override fun loadModule(modelSource: String, promise: Promise) { + override fun loadModule( + modelSource: String, + promise: Promise, + ) { try { model = ImageSegmentationModel(reactApplicationContext) model.loadModel(modelSource) @@ -37,20 +39,20 @@ class ImageSegmentation(reactContext: ReactApplicationContext) : } } - override fun forward(input: String, - classesOfInterest: ReadableArray, - resize:Boolean, - promise: Promise) { + override fun forward( + input: String, + classesOfInterest: ReadableArray, + resize: Boolean, + promise: Promise, + ) { try { val output = - model.runModel(Pair(ImageProcessor.readImage(input), classesOfInterest)) + model.runModel(Triple(ImageProcessor.readImage(input), classesOfInterest, resize)) promise.resolve(output) - }catch(e: Exception){ + } catch (e: Exception) { promise.reject(e.message!!, e.message) } } - override fun getName(): String { - return NAME - } + override fun getName(): String = NAME } diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt index 70c657f9..a8deb889 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt @@ -1,22 +1,22 @@ package com.swmansion.rnexecutorch.models.imagesegmentation -import com.facebook.react.bridge.Arguments; +import com.facebook.react.bridge.Arguments +import com.facebook.react.bridge.ReactApplicationContext import com.facebook.react.bridge.ReadableArray import com.facebook.react.bridge.WritableMap -import com.facebook.react.bridge.ReactApplicationContext +import com.swmansion.rnexecutorch.models.BaseModel +import com.swmansion.rnexecutorch.utils.ArrayUtils import com.swmansion.rnexecutorch.utils.ImageProcessor import com.swmansion.rnexecutorch.utils.softmax -import org.opencv.core.Mat import org.opencv.core.CvType +import org.opencv.core.Mat import org.opencv.core.Size import org.opencv.imgproc.Imgproc -import org.pytorch.executorch.Tensor import org.pytorch.executorch.EValue -import com.swmansion.rnexecutorch.models.BaseModel -import com.swmansion.rnexecutorch.utils.ArrayUtils -class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) - : BaseModel , WritableMap>(reactApplicationContext) { +class ImageSegmentationModel( + reactApplicationContext: ReactApplicationContext, +) : BaseModel, WritableMap>(reactApplicationContext) { private lateinit var originalSize: Size private fun getModelImageSize(): Size { @@ -27,44 +27,53 @@ class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) return Size(height.toDouble(), width.toDouble()) } - override fun preprocess(input: Pair): EValue { - originalSize = input.first.size() - Imgproc.resize(input.first, input.first, getModelImageSize()) - return ImageProcessor.matToEValue(input.first, module.getInputShape(0)) + fun preprocess(input: Mat): EValue { + originalSize = input.size() + Imgproc.resize(input, input, getModelImageSize()) + return ImageProcessor.matToEValue(input, module.getInputShape(0)) } - private fun rescaleResults(result: Array, numLabels: Int) - : List { - val modelShape = getModelImageSize() - val numModelPixels = (modelShape.height * modelShape.width).toInt() + private fun extractResults( + result: Array, + numLabels: Int, + resize: Boolean, + ): List { + val modelSize = getModelImageSize() + val numModelPixels = (modelSize.height * modelSize.width).toInt() - val resizedLabelScores = mutableListOf() + val extractedLabelScores = mutableListOf() for (label in 0.., numLabels: Int) - : Mat { - val argMax = Mat(originalSize, CvType.CV_32S) - val numOriginalPixels = (originalSize.height * originalSize.width).toInt() - android.util.Log.d("ETTT", "adjustScoresPerPixel: start") - for (pixel in 0.., + numLabels: Int, + outputSize: Size, + ): Mat { + val argMax = Mat(outputSize, CvType.CV_32S) + val numPixels = (outputSize.height * outputSize.width).toInt() + for (pixel in 0..() for (mat in labelScores) { scores.add(mat.get(row, col)[0].toFloat()) @@ -74,59 +83,71 @@ class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext) labelScores[label].put(row, col, floatArrayOf(adjustedScores[label])) } - val maxIndex = scores.withIndex().maxBy{it.value}.index + val maxIndex = scores.withIndex().maxBy { it.value }.index argMax.put(row, col, intArrayOf(maxIndex)) } return argMax } - override fun postprocess(output: Array) - : WritableMap { + fun postprocess( + output: Array, + classesOfInterest: ReadableArray, + resize: Boolean, + ): WritableMap { val output = output[0].toTensor().dataAsFloatArray.toTypedArray() - val modelShape = getModelImageSize() - val numLabels = deeplabv3_resnet50_labels.size; - val numOriginalPixels = (originalSize.height * originalSize.width).toInt() + val modelSize = getModelImageSize() + val numLabels = deeplabv3_resnet50_labels.size - require(output.count() == (numLabels * modelShape.height * modelShape.width).toInt()) - {"Model generated unexpected output size."} + require(output.count() == (numLabels * modelSize.height * modelSize.width).toInt()) { "Model generated unexpected output size." } - val rescaledResults = rescaleResults(output, numLabels) + val outputSize = if (resize) originalSize else modelSize + val numOutputPixels = (outputSize.height * outputSize.width).toInt() - val argMax = adjustScoresPerPixel(rescaledResults, numLabels) + val extractedResults = extractResults(output, numLabels, resize) - // val labelSet = mutableSetOf() + val argMax = adjustScoresPerPixel(extractedResults, numLabels, outputSize) + + val labelSet = mutableSetOf() // Filter by the label set when base class changed + for (i in 0..) - : WritableMap { - val modelInput = preprocess(input) + override fun runModel(input: Triple): WritableMap { + val modelInput = preprocess(input.first) val modelOutput = forward(modelInput) - return postprocess(modelOutput) + return postprocess(modelOutput, input.second, input.third) } } From c040473136d1af91b8fbd605f0ba90f4d49cdc86 Mon Sep 17 00:00:00 2001 From: Jakub Gonera Date: Wed, 12 Mar 2025 14:39:01 +0100 Subject: [PATCH 15/16] Remove unnecessary calls to openCV in Android --- .../ImageSegmentationModel.kt | 62 +++++++------------ 1 file changed, 24 insertions(+), 38 deletions(-) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt index a8deb889..2b86bd55 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt @@ -34,57 +34,56 @@ class ImageSegmentationModel( } private fun extractResults( - result: Array, + result: FloatArray, numLabels: Int, resize: Boolean, - ): List { + ): List { val modelSize = getModelImageSize() val numModelPixels = (modelSize.height * modelSize.width).toInt() - val extractedLabelScores = mutableListOf() + val extractedLabelScores = mutableListOf() for (label in 0.., + labelScores: List, numLabels: Int, outputSize: Size, - ): Mat { - val argMax = Mat(outputSize, CvType.CV_32S) + ): IntArray { val numPixels = (outputSize.height * outputSize.width).toInt() + val argMax = IntArray(numPixels) for (pixel in 0..() - for (mat in labelScores) { - scores.add(mat.get(row, col)[0].toFloat()) + for (buffer in labelScores) { + scores.add(buffer[pixel]) } val adjustedScores = softmax(scores.toTypedArray()) for (label in 0.. Date: Wed, 12 Mar 2025 15:38:13 +0100 Subject: [PATCH 16/16] Change segmentation labels to uppercase on Android --- .../models/imageSegmentation/Constants.kt | 30 +++++++++++++++---- .../ImageSegmentationModel.kt | 8 ++--- 2 files changed, 28 insertions(+), 10 deletions(-) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt index e1f127b1..7ba7fcb5 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt @@ -1,8 +1,26 @@ package com.swmansion.rnexecutorch.models.imagesegmentation -val deeplabv3_resnet50_labels: Array = arrayOf( - "background", "aeroplane", "bicycle", "bird", "boat", - "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", - "dog", "horse", "motorbike", "person", "pottedplant", "sheep", - "sofa", "train", "tvmonitor" -) \ No newline at end of file +val deeplabv3_resnet50_labels: Array = + arrayOf( + "BACKGROUND", + "AEROPLANE", + "BICYCLE", + "BIRD", + "BOAT", + "BOTTLE", + "BUS", + "CAR", + "CAT", + "CHAIR", + "COW", + "DININGTABLE", + "DOG", + "HORSE", + "MOTORBIKE", + "PERSON", + "POTTEDPLANT", + "SHEEP", + "SOFA", + "TRAIN", + "TVMONITOR", + ) diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt index 2b86bd55..36c1594b 100644 --- a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt +++ b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt @@ -94,15 +94,15 @@ class ImageSegmentationModel( classesOfInterest: ReadableArray, resize: Boolean, ): WritableMap { - val output = output[0].toTensor().dataAsFloatArray + val outputData = output[0].toTensor().dataAsFloatArray val modelSize = getModelImageSize() val numLabels = deeplabv3_resnet50_labels.size - require(output.count() == (numLabels * modelSize.height * modelSize.width).toInt()) { "Model generated unexpected output size." } + require(outputData.count() == (numLabels * modelSize.height * modelSize.width).toInt()) { "Model generated unexpected output size." } val outputSize = if (resize) originalSize else modelSize - val extractedResults = extractResults(output, numLabels, resize) + val extractedResults = extractResults(outputData, numLabels, resize) val argMax = adjustScoresPerPixel(extractedResults, numLabels, outputSize) @@ -124,7 +124,7 @@ class ImageSegmentationModel( } res.putArray( - "argmax", + "ARGMAX", ArrayUtils.createReadableArrayFromIntArray(argMax), )