Add image segmentation model logic

JakubGonera · JakubGonera · commit 899e440509f3 · 2025-03-12T15:49:36.000+01:00
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt b/android/src/main/java/com/swmansion/rnexecutorch/ImageSegmentation.kt
@@ -0,0 +1,52 @@
+package com.swmansion.rnexecutorch
+
+import android.util.Log
+import com.facebook.react.bridge.Promise
+import com.facebook.react.bridge.ReadableArray
+import com.facebook.react.bridge.ReactApplicationContext
+import com.swmansion.rnexecutorch.utils.ETError
+import com.swmansion.rnexecutorch.models.imagesegmentation.ImageSegmentationModel
+import com.swmansion.rnexecutorch.utils.ImageProcessor
+import org.opencv.android.OpenCVLoader
+
+class ImageSegmentation(reactContext: ReactApplicationContext) :
+  NativeImageSegmentationSpec(reactContext) {
+
+  private lateinit var model: ImageSegmentationModel
+
+  companion object {
+    const val NAME = "ImageSegmentation"
+
+    init {
+      if(!OpenCVLoader.initLocal()){
+        Log.d("rn_executorch", "OpenCV not loaded")
+      } else {
+        Log.d("rn_executorch", "OpenCV loaded")
+      }
+    }
+  }
+
+  override fun loadModule(modelSource: String, promise: Promise) {
+    try {
+      model = ImageSegmentationModel(reactApplicationContext)
+      model.loadModel(modelSource)
+      promise.resolve(0)
+    } catch (e: Exception) {
+      promise.reject(e.message!!, ETError.InvalidModelSource.toString())
+    }
+  }
+
+  override fun forward(input: String, classesOfInterest: ReadableArray, promise: Promise) {
+    try {
+      val output =
+          model.runModel(Pair(ImageProcessor.readImage(input), classesOfInterest))
+      promise.resolve(output)
+    }catch(e: Exception){
+      promise.reject(e.message!!, e.message)
+    }
+  }
+
+  override fun getName(): String {
+    return NAME
+  }
+}
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt b/android/src/main/java/com/swmansion/rnexecutorch/RnExecutorchPackage.kt
@@ -30,6 +30,8 @@ class RnExecutorchPackage : TurboReactPackage() {
       OCR(reactContext)
     } else if (name == VerticalOCR.NAME) {
       VerticalOCR(reactContext)
+    } else if (name == ImageSegmentation.NAME) {
+      ImageSegmentation(reactContext)
     } else {
       null
     }
@@ -115,6 +117,13 @@ class RnExecutorchPackage : TurboReactPackage() {
           false, // isCxxModule
           true,
         )
+
+      moduleInfos[ImageSegmentation.NAME] = ReactModuleInfo(
+        ImageSegmentation.NAME, ImageSegmentation.NAME, false,  // canOverrideExistingModule
+        false,  // needsEagerInit
+        false,  // isCxxModule
+        true
+      )
       moduleInfos
     }
 }
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/classification/ClassificationModel.kt
@@ -3,6 +3,7 @@ package com.swmansion.rnexecutorch.models.classification
 import com.facebook.react.bridge.ReactApplicationContext
 import com.swmansion.rnexecutorch.models.BaseModel
 import com.swmansion.rnexecutorch.utils.ImageProcessor
+import com.swmansion.rnexecutorch.utils.softmax
 import org.opencv.core.Mat
 import org.opencv.core.Size
 import org.opencv.imgproc.Imgproc
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/Constants.kt
@@ -0,0 +1,8 @@
+package com.swmansion.rnexecutorch.models.imagesegmentation
+
+val deeplabv3_resnet50_labels: Array<String> = arrayOf(
+    "background", "aeroplane", "bicycle", "bird", "boat",
+    "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
+    "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
+    "sofa", "train", "tvmonitor"
+)
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/imageSegmentation/ImageSegmentationModel.kt
@@ -0,0 +1,120 @@
+package com.swmansion.rnexecutorch.models.imagesegmentation
+
+import com.facebook.react.bridge.ReadableArray
+import com.facebook.react.bridge.ReactApplicationContext
+import com.swmansion.rnexecutorch.utils.ImageProcessor
+import com.swmansion.rnexecutorch.utils.softmax
+import org.opencv.core.Mat
+import org.opencv.core.CvType
+import org.opencv.core.Size
+import org.opencv.imgproc.Imgproc
+import org.pytorch.executorch.Tensor
+import org.pytorch.executorch.EValue
+import com.swmansion.rnexecutorch.models.BaseModel
+
+class ImageSegmentationModel(reactApplicationContext: ReactApplicationContext)
+    : BaseModel <Pair<Mat, ReadableArray>, Map<String, List<Any>>>(reactApplicationContext) {
+  private lateinit var originalSize: Size
+
+  private fun getModelImageSize(): Size {
+    val inputShape = module.getInputShape(0)
+    val width = inputShape[inputShape.lastIndex]
+    val height = inputShape[inputShape.lastIndex - 1]
+
+    return Size(height.toDouble(), width.toDouble())
+  }
+
+  override fun preprocess(input: Pair<Mat, ReadableArray>): EValue {
+    originalSize = input.first.size()
+    Imgproc.resize(input.first, input.first, getModelImageSize())
+    return ImageProcessor.matToEValue(input.first, module.getInputShape(0))
+  }
+
+  private fun rescaleResults(result: Array<Float>, numLabels: Int)
+        : List<Mat> {
+    val modelShape = getModelImageSize()
+    val numModelPixels = (modelShape.height * modelShape.width).toInt()
+
+    val resizedLabelScores = mutableListOf<Mat>()
+
+    for (label in 0..<numLabels) {
+      val mat = Mat(modelShape, CvType.CV_32F)
+
+      for (pixel in 0..<numModelPixels) {
+        val row = pixel / modelShape.width.toInt()
+        val col = pixel % modelShape.width.toInt()
+        val v = floatArrayOf(result[label * numModelPixels + pixel])
+        mat.put(row, col, v)
+      }
+
+      val resizedMat = Mat()
+      Imgproc.resize(mat, resizedMat, originalSize)
+      resizedLabelScores.add(resizedMat)
+    }
+    return resizedLabelScores;
+  }
+
+  private fun adjustScoresPerPixel(labelScores: List<Mat>, numLabels: Int)
+        : Mat {
+    val argMax = Mat(originalSize, CvType.CV_32S)
+    val numOriginalPixels = (originalSize.height * originalSize.width).toInt()
+    for (pixel in 0..<numOriginalPixels) {
+      val row = pixel / originalSize.width.toInt()
+      val col = pixel % originalSize.height.toInt()
+      val scores = mutableListOf<Float>()
+      for (mat in labelScores) {
+        val v = FloatArray(1)
+        mat.get(row, col, v)
+        scores.add(v[0])
+      }
+
+      val adjustedScores = softmax(scores.toTypedArray())
+
+      for (label in 0..<numLabels) {
+        labelScores[label].put(row, col, FloatArray(1){adjustedScores[label]})
+      }
+
+      val maxIndex = scores.withIndex().maxBy{it.value}.index
+      argMax.put(row, col, IntArray(1){maxIndex})
+    }
+
+    return argMax
+  }
+
+  override fun postprocess(output: Array<EValue>): Map<String, List<Any>> {
+    val output = output[0].toTensor().dataAsFloatArray.toTypedArray()
+    val modelShape = getModelImageSize()
+    val numLabels = deeplabv3_resnet50_labels.size;
+    val numOriginalPixels = (originalSize.height * originalSize.width).toInt()
+
+    require(output.count() == (numLabels * modelShape.height * modelShape.width).toInt())
+      {"Model generated unexpected output size."}
+
+    val rescaledResults = rescaleResults(output, numLabels)
+
+    val argMax = adjustScoresPerPixel(rescaledResults, numLabels)
+    
+    // val labelSet = mutableSetOf<String>()
+    // Filter by the label set when base class changed
+
+    val res = mutableMapOf<String, List<Any>>()
+    
+    for (label in 0..<numLabels) {
+      val buffer = FloatArray(numOriginalPixels)
+      rescaledResults[label].get(0, 0, buffer)
+      res[deeplabv3_resnet50_labels[label]] = buffer.toList()
+    }
+
+    val argMaxBuffer = IntArray(numOriginalPixels)
+    argMax.get(0, 0, argMaxBuffer)
+    res["argmax"] = argMaxBuffer.toList()
+
+    return res
+  }
+
+  override fun runModel(input: Pair<Mat, ReadableArray>): Map<String, List<Any>> {
+    val modelInput = preprocess(input)
+    val modelOutput = forward(modelInput)
+    return postprocess(modelOutput)
+  }
+}
diff --git a/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt b/android/src/main/java/com/swmansion/rnexecutorch/models/styleTransfer/StyleTransferModel.kt
@@ -6,6 +6,7 @@ import org.opencv.core.Mat
 import org.opencv.core.Size
 import org.opencv.imgproc.Imgproc
 import org.pytorch.executorch.EValue
+import com.swmansion.rnexecutorch.models.BaseModel
 
 class StyleTransferModel(
   reactApplicationContext: ReactApplicationContext,

Original file line number	Diff line number	Diff line change
`@@ -30,6 +30,8 @@ class RnExecutorchPackage : TurboReactPackage() {`
`30`	`30`	`OCR(reactContext)`
`31`	`31`	`} else if (name == VerticalOCR.NAME) {`
`32`	`32`	`VerticalOCR(reactContext)`
	`33`	`+ } else if (name == ImageSegmentation.NAME) {`
	`34`	`+ ImageSegmentation(reactContext)`
`33`	`35`	`} else {`
`34`	`36`	`null`
`35`	`37`	`}`
`@@ -115,6 +117,13 @@ class RnExecutorchPackage : TurboReactPackage() {`
`115`	`117`	`false, // isCxxModule`
`116`	`118`	`true,`
`117`	`119`	`)`
	`120`	`+`
	`121`	`+ moduleInfos[ImageSegmentation.NAME] = ReactModuleInfo(`
	`122`	`+ ImageSegmentation.NAME, ImageSegmentation.NAME, false, // canOverrideExistingModule`
	`123`	`+ false, // needsEagerInit`
	`124`	`+ false, // isCxxModule`
	`125`	`+ true`
	`126`	`+ )`
`118`	`127`	`moduleInfos`
`119`	`128`	`}`
`120`	`129`	`}`