Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: implemented vertical ocr #109

Merged
merged 12 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ class VerticalOCR(reactContext: ReactApplicationContext) :
)

if (this.independentCharacters) {
croppedCharacter = RecognizerUtils.cropSingleCharacter(croppedCharacter)
croppedCharacter = RecognizerUtils.normalizeForRecognizer(croppedCharacter, 0.0, true)
val recognitionResult = recognizer.runModel(croppedCharacter)
val predIndex = recognitionResult.first
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import com.swmansion.rnexecutorch.utils.ImageProcessor
import org.opencv.core.Core
import org.opencv.core.CvType
import org.opencv.core.Mat
import org.opencv.core.MatOfFloat
import org.opencv.core.MatOfInt
import org.opencv.core.MatOfPoint2f
import org.opencv.core.Point
import org.opencv.core.Rect
Expand Down Expand Up @@ -320,5 +322,70 @@ class RecognizerUtils {

return boundingBox
}

fun cropSingleCharacter(img: Mat): Mat {
val histogram = Mat()
val histSize = MatOfInt(256)
val range = MatOfFloat(0f, 256f)
Imgproc.calcHist(
listOf(img),
MatOfInt(0),
Mat(),
histogram,
histSize,
range
)

val midPoint = 256 / 2
var sumLeft = 0.0
var sumRight = 0.0
for (i in 0 until midPoint) {
sumLeft += histogram.get(i, 0)[0]
}
for (i in midPoint until 256) {
sumRight += histogram.get(i, 0)[0]
}

val thresholdType = if (sumLeft < sumRight) Imgproc.THRESH_BINARY_INV else Imgproc.THRESH_BINARY

val thresh = Mat()
Imgproc.threshold(img, thresh, 0.0, 255.0, thresholdType + Imgproc.THRESH_OTSU)

val labels = Mat()
val stats = Mat()
val centroids = Mat()
val numLabels = Imgproc.connectedComponentsWithStats(thresh, labels, stats, centroids, 8)

val centralThreshold = 0.3
val height = thresh.rows()
val width = thresh.cols()
val minX = centralThreshold * width
val maxX = (1 - centralThreshold) * width
val minY = centralThreshold * height
val maxY = (1 - centralThreshold) * height

var selectedComponent = -1
for (i in 1 until numLabels) {
val area = stats.get(i, Imgproc.CC_STAT_AREA)[0].toInt()
val cx = centroids.get(i, 0)[0]
val cy = centroids.get(i, 1)[0]
if (cx > minX && cx < maxX && cy > minY && cy < maxY && area > 70) {
if (selectedComponent == -1 || area > stats.get(selectedComponent, Imgproc.CC_STAT_AREA)[0]) {
selectedComponent = i
}
}
}

val mask = Mat.zeros(img.size(), CvType.CV_8UC1)
if (selectedComponent != -1) {
Core.compare(labels, Scalar(selectedComponent.toDouble()), mask, Core.CMP_EQ)
}

val resultImage = Mat.zeros(img.size(), img.type())
img.copyTo(resultImage, mask)

Core.bitwise_not(resultImage, resultImage)
return resultImage
}
}
}
62 changes: 46 additions & 16 deletions ios/RnExecutorch/models/ocr/utils/RecognizerUtils.mm
Original file line number Diff line number Diff line change
Expand Up @@ -261,40 +261,70 @@ + (double)computeConfidenceScore:(NSArray<NSNumber *> *)valuesArray
}

+ (cv::Mat)cropSingleCharacter:(cv::Mat)img {
cv::Mat histogram;

int histSize = 256;
float range[] = {0, 256};
const float *histRange = {range};
bool uniform = true, accumulate = false;

cv::calcHist(&img, 1, 0, cv::Mat(), histogram, 1, &histSize, &histRange, uniform,
accumulate);

int midPoint = histSize / 2;

double sumLeft = 0.0, sumRight = 0.0;
for (int i = 0; i < midPoint; i++) {
sumLeft += histogram.at<float>(i);
}
for (int i = midPoint; i < histSize; i++) {
sumRight += histogram.at<float>(i);
}

int thresholdType;
if (sumLeft < sumRight) {
thresholdType = cv::THRESH_BINARY_INV;
} else {
thresholdType = cv::THRESH_BINARY;
}

cv::Mat thresh;
cv::threshold(img, thresh, 0, 255, cv::THRESH_BINARY + cv::THRESH_OTSU);
cv::threshold(img, thresh, 0, 255, thresholdType + cv::THRESH_OTSU);

cv::Mat labels, stats, centroids;
const int numLabels = connectedComponentsWithStats(thresh, labels, stats, centroids, 8);
const int numLabels =
connectedComponentsWithStats(thresh, labels, stats, centroids, 8);
const CGFloat centralThreshold = 0.3;
const int height = thresh.rows;
const int width = thresh.cols;

const int minX = centralThreshold * width;
const int maxX = (1 - centralThreshold) * width;
const int minY = centralThreshold * height;
const int maxY = (1 - centralThreshold) * height;

int selectedComponent = -1;

for (int i = 1; i < numLabels; i++) {
const int area = stats.at<int>(i, cv::CC_STAT_AREA);
const double cx = centroids.at<double>(i, 0);
const double cy = centroids.at<double>(i, 1);
if (minX < cx && cx < maxX && minY < cy && cy < maxY && area > 70) {
if (selectedComponent == -1 || area > stats.at<int>(selectedComponent, cv::CC_STAT_AREA)) {
selectedComponent = i;
}
const int area = stats.at<int>(i, cv::CC_STAT_AREA);
const double cx = centroids.at<double>(i, 0);
const double cy = centroids.at<double>(i, 1);

if (minX < cx && cx < maxX && minY < cy && cy < maxY && area > 70) {
if (selectedComponent == -1 ||
area > stats.at<int>(selectedComponent, cv::CC_STAT_AREA)) {
selectedComponent = i;
}
}
}
cv::Mat mask = cv::Mat::zeros(img.size(), CV_8UC1);
if (selectedComponent != -1) {
mask = (labels == selectedComponent) / 255;
mask = (labels == selectedComponent) / 255;
}
cv::Mat resultImage = cv::Mat::zeros(img.size(), img.type());
img.copyTo(resultImage, mask);

cv::bitwise_not(resultImage, resultImage);
return resultImage;
}

@end
Loading