software-mansion · NorbertKlockiewicz · Mar 4, 2025 · Feb 26, 2025 · Feb 27, 2025 · Feb 27, 2025
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/VerticalDetector.kt
@@ -53,12 +53,7 @@ class VerticalDetector(
       Size(modelImageSize.width / 2, modelImageSize.height / 2)
     )
 
-    var txtThreshold = Constants.TEXT_THRESHOLD
-
-    if (!detectSingleCharacter) {
-      txtThreshold = Constants.TEXT_THRESHOLD_VERTICAL
-    }
-
+    val txtThreshold = if (detectSingleCharacter) Constants.TEXT_THRESHOLD else Constants.TEXT_THRESHOLD_VERTICAL
     var bBoxesList = DetectorUtils.getDetBoxesFromTextMapVertical(
       scoreText,
       scoreLink,

diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/DetectorUtils.kt
@@ -332,9 +332,10 @@ class DetectorUtils {
       val detectedBoxes = mutableListOf<OCRbBox>()
       for (i in 1 until nLabels) {
         val area = stats.get(i, Imgproc.CC_STAT_AREA)[0].toInt()
+        if (area < 20) continue
+
         val height = stats.get(i, Imgproc.CC_STAT_HEIGHT)[0].toInt()
         val width = stats.get(i, Imgproc.CC_STAT_WIDTH)[0].toInt()
-        if (area < 20) continue
 
         if (!independentCharacters && height < width) continue
         val mask = createMaskFromLabels(labels, i)

diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/ocr/utils/RecognizerUtils.kt
@@ -256,16 +256,12 @@ class RecognizerUtils {
         img = adjustContrastGrey(img, adjustContrast)
       }
 
-      var desiredWidth =
-        if (isVertical) Constants.VERTICAL_SMALL_MODEL_WIDTH else Constants.SMALL_MODEL_WIDTH
-
-      if (img.width() >= Constants.LARGE_MODEL_WIDTH) {
-        desiredWidth = Constants.LARGE_MODEL_WIDTH
-      } else if (img.width() >= Constants.MEDIUM_MODEL_WIDTH) {
-        desiredWidth = Constants.MEDIUM_MODEL_WIDTH
+      val desiredWidth =when {
+        img.width() >= Constants.LARGE_MODEL_WIDTH -> Constants.LARGE_MODEL_WIDTH
+        img.width() >= Constants.MEDIUM_MODEL_WIDTH -> Constants.MEDIUM_MODEL_WIDTH
+        else -> if (isVertical) Constants.VERTICAL_SMALL_MODEL_WIDTH else Constants.SMALL_MODEL_WIDTH
       }
 
-
       img = ImageProcessor.resizeWithPadding(img, desiredWidth, Constants.MODEL_HEIGHT)
       img.convertTo(img, CvType.CV_32F, 1.0 / 255.0)
       Core.subtract(img, Scalar(0.5), img)

diff --git a/ios/RnExecutorch/VerticalOCR.h b/ios/RnExecutorch/VerticalOCR.h
@@ -1,7 +1,5 @@
 #import <RnExecutorchSpec/RnExecutorchSpec.h>
 
-constexpr CGFloat recognizerRatio = 1.6;
-
 @interface VerticalOCR : NSObject <NativeVerticalOCRSpec>
 
 @end
diff --git a/ios/RnExecutorch/models/ocr/Detector.h b/ios/RnExecutorch/models/ocr/Detector.h
@@ -1,23 +1,7 @@
 #import "BaseModel.h"
 #import "RecognitionHandler.h"
 #import "opencv2/opencv.hpp"
-
-constexpr CGFloat textThreshold = 0.4;
-constexpr CGFloat textThresholdVertical = 0.3;
-constexpr CGFloat linkThreshold = 0.4;
-constexpr CGFloat lowTextThreshold = 0.7;
-constexpr CGFloat centerThreshold = 0.5;
-constexpr CGFloat distanceThreshold = 2.0;
-constexpr CGFloat heightThreshold = 2.0;
-constexpr CGFloat restoreRatio = 3.2;
-constexpr CGFloat restoreRatioVertical = 2.0;
-constexpr int minSideThreshold = 15;
-constexpr int maxSideThreshold = 30;
-constexpr int maxWidth = largeModelWidth + (largeModelWidth * 0.15);
-constexpr int minSize = 20;
-
-const cv::Scalar mean(0.485, 0.456, 0.406);
-const cv::Scalar variance(0.229, 0.224, 0.225);
+#import "utils/Constants.h"
 
 @interface Detector : BaseModel
 

diff --git a/ios/RnExecutorch/models/ocr/VerticalDetector.h b/ios/RnExecutorch/models/ocr/VerticalDetector.h
@@ -1,23 +1,7 @@
 #import "BaseModel.h"
 #import "RecognitionHandler.h"
 #import "opencv2/opencv.hpp"
-
-constexpr CGFloat textThreshold = 0.4;
-constexpr CGFloat textThresholdVertical = 0.3;
-constexpr CGFloat linkThreshold = 0.4;
-constexpr CGFloat lowTextThreshold = 0.7;
-constexpr CGFloat centerThreshold = 0.5;
-constexpr CGFloat distanceThreshold = 2.0;
-constexpr CGFloat heightThreshold = 2.0;
-constexpr CGFloat restoreRatio = 3.2;
-constexpr CGFloat restoreRatioVertical = 2.0;
-constexpr int minSideThreshold = 15;
-constexpr int maxSideThreshold = 30;
-constexpr int maxWidth = largeModelWidth + (largeModelWidth * 0.15);
-constexpr int minSize = 20;
-
-const cv::Scalar mean(0.485, 0.456, 0.406);
-const cv::Scalar variance(0.229, 0.224, 0.225);
+#import "utils/Constants.h"
 
 @interface VerticalDetector : BaseModel
 

diff --git a/ios/RnExecutorch/models/ocr/VerticalDetector.mm b/ios/RnExecutorch/models/ocr/VerticalDetector.mm
@@ -80,10 +80,9 @@ group each character into a single instance (sequence) Both matrices are
                              outputMat2:scoreAffinityCV
                                withSize:cv::Size(modelImageSize.width / 2,
                                                  modelImageSize.height / 2)];
-  CGFloat txtThreshold = textThreshold;
-  if (!self->detectSingleCharacters) {
-    txtThreshold = textThresholdVertical;
-  }
+  CGFloat txtThreshold = (self->detectSingleCharacters) ? textThreshold
+                                                        : textThresholdVertical;
+
   NSArray *bBoxesList = [DetectorUtils
       getDetBoxesFromTextMapVertical:scoreTextCV
                          affinityMap:scoreAffinityCV

diff --git a/ios/RnExecutorch/models/ocr/utils/Constants.h b/ios/RnExecutorch/models/ocr/utils/Constants.h
@@ -0,0 +1,17 @@
+constexpr CGFloat textThreshold = 0.4;
+constexpr CGFloat textThresholdVertical = 0.3;
+constexpr CGFloat linkThreshold = 0.4;
+constexpr CGFloat lowTextThreshold = 0.7;
+constexpr CGFloat centerThreshold = 0.5;
+constexpr CGFloat distanceThreshold = 2.0;
+constexpr CGFloat heightThreshold = 2.0;
+constexpr CGFloat restoreRatio = 3.2;
+constexpr CGFloat restoreRatioVertical = 2.0;
+constexpr int minSideThreshold = 15;
+constexpr int maxSideThreshold = 30;
+constexpr int maxWidth = largeModelWidth + (largeModelWidth * 0.15);
+constexpr int minSize = 20;
+
+const cv::Scalar mean(0.485, 0.456, 0.406);
+const cv::Scalar variance(0.229, 0.224, 0.225);
+
diff --git a/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm b/ios/RnExecutorch/models/ocr/utils/DetectorUtils.mm
@@ -64,10 +64,10 @@ + (NSArray *)getDetBoxesFromTextMapVertical:(cv::Mat)textMap
   NSMutableArray *detectedBoxes = [NSMutableArray array];
   for (int i = 1; i < nLabels; i++) {
     const int area = stats.at<int>(i, cv::CC_STAT_AREA);
-    const int width = stats.at<int>(i, cv::CC_STAT_WIDTH);
-    const int height = stats.at<int>(i, cv::CC_STAT_HEIGHT);
     if (area < 20)
       continue;
+    const int width = stats.at<int>(i, cv::CC_STAT_WIDTH);
+    const int height = stats.at<int>(i, cv::CC_STAT_HEIGHT);
 
     if (!independentCharacters && height < width)
       continue;

diff --git a/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm b/ios/RnExecutorch/models/ocr/utils/OCRUtils.mm
@@ -55,6 +55,7 @@ @implementation OCRUtils
 
 + (cv::Rect)extractBoundingBox:(NSArray *)coords {
   std::vector<cv::Point2f> points;
+  points.reserve(coords.count);
   for (NSValue *value in coords) {
     const CGPoint point = [value CGPointValue];
 

diff --git a/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm b/ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm
@@ -62,12 +62,8 @@ + (CGFloat)calculateRatio:(int)width height:(int)height {
     image = [self adjustContrastGrey:image target:adjustContrast];
   }
 
-  int desiredWidth;
-  if (isVertical){
-    desiredWidth = 64;
-  }else{
-    desiredWidth = 128;
-  }
+  int desiredWidth = (isVertical) ? 64 : 128;
+
   if (image.cols >= 512) {
     desiredWidth = 512;
   } else if (image.cols >= 256) {
@@ -233,6 +229,7 @@ + (double)computeConfidenceScore:(NSArray<NSNumber *> *)valuesArray
                    originalPaddings:(NSDictionary *)originalPaddings {
   CGPoint topLeft = [originalBbox[0] CGPointValue];
   std::vector<cv::Point2f> points;
+  points.reserve(bbox.count);
   for (NSValue *coords in bbox) {
     CGPoint point = [coords CGPointValue];
 
@@ -251,7 +248,7 @@ + (double)computeConfidenceScore:(NSArray<NSNumber *> *)valuesArray
     point.x = point.x * [originalPaddings[@"resizeRatio"] floatValue];
     point.y = point.y * [originalPaddings[@"resizeRatio"] floatValue];
 
-    points.push_back(cv::Point2f(point.x, point.y));
+    points.emplace_back(cv::Point2f(point.x, point.y));
   }
 
   cv::Rect rect = cv::boundingRect(points);

diff --git a/src/constants/ocr/languageDicts.ts b/src/constants/ocr/languageDicts.ts
diff --git a/src/hooks/computer_vision/useOCR.ts b/src/hooks/computer_vision/useOCR.ts
@@ -1,6 +1,5 @@
 import { useEffect, useState } from 'react';
 import { fetchResource } from '../../utils/fetchResource';
-import { languageDicts } from '../../constants/ocr/languageDicts';
 import { symbols } from '../../constants/ocr/symbols';
 import { getError, ETError } from '../../Error';
 import { OCR } from '../../native/RnExecutorchModules';
@@ -45,7 +44,7 @@ export const useOCR = ({
           recognizerSmall: string;
         };
 
-        if (!symbols[language] || !languageDicts[language]) {
+        if (!symbols[language]) {
           setError(getError(ETError.LanguageNotSupported));
           return;
         }

diff --git a/src/hooks/computer_vision/useVerticalOCR.ts b/src/hooks/computer_vision/useVerticalOCR.ts
@@ -1,6 +1,5 @@
 import { useEffect, useState } from 'react';
 import { fetchResource } from '../../utils/fetchResource';
-import { languageDicts } from '../../constants/ocr/languageDicts';
 import { symbols } from '../../constants/ocr/symbols';
 import { getError, ETError } from '../../Error';
 import { VerticalOCR } from '../../native/RnExecutorchModules';
@@ -46,37 +45,25 @@ export const useVerticalOCR = ({
         )
           return;
 
-        let recognizerPath;
-
-        const detectorPaths = {} as {
-          detectorLarge: string;
-          detectorNarrow: string;
-        };
-
-        if (!symbols[language] || !languageDicts[language]) {
+        if (!symbols[language]) {
           setError(getError(ETError.LanguageNotSupported));
           return;
         }
 
-        await Promise.all([
-          fetchResource(detectorSources.detectorLarge),
-          fetchResource(detectorSources.detectorNarrow),
-        ]).then((values) => {
-          detectorPaths.detectorLarge = values[0];
-          detectorPaths.detectorNarrow = values[1];
-        });
+        const recognizerPath = independentCharacters
+          ? await fetchResource(
+              recognizerSources.recognizerSmall,
+              setDownloadProgress
+            )
+          : await fetchResource(
+              recognizerSources.recognizerLarge,
+              setDownloadProgress
+            );
 
-        if (independentCharacters) {
-          recognizerPath = await fetchResource(
-            recognizerSources.recognizerSmall,
-            setDownloadProgress
-          );
-        } else {
-          recognizerPath = await fetchResource(
-            recognizerSources.recognizerLarge,
-            setDownloadProgress
-          );
-        }
+        const detectorPaths = {
+          detectorLarge: await fetchResource(detectorSources.detectorLarge),
+          detectorNarrow: await fetchResource(detectorSources.detectorNarrow),
+        };
 
         setIsReady(false);
         await VerticalOCR.loadModule(

diff --git a/src/modules/computer_vision/OCRModule.ts b/src/modules/computer_vision/OCRModule.ts
@@ -1,4 +1,3 @@
-import { languageDicts } from '../../constants/ocr/languageDicts';
 import { symbols } from '../../constants/ocr/symbols';
 import { getError, ETError } from '../../Error';
 import { OCR } from '../../native/RnExecutorchModules';
@@ -27,7 +26,7 @@ export class OCRModule {
         recognizerSmall: string;
       };
 
-      if (!symbols[language] || !languageDicts[language]) {
+      if (!symbols[language]) {
         throw new Error(getError(ETError.LanguageNotSupported));
       }