diff --git a/csrc/mmdeploy/operation/cuda/cvtcolor.cpp b/csrc/mmdeploy/operation/cuda/cvtcolor.cpp index eaff21b1a5..1e35adc06b 100644 --- a/csrc/mmdeploy/operation/cuda/cvtcolor.cpp +++ b/csrc/mmdeploy/operation/cuda/cvtcolor.cpp @@ -90,10 +90,11 @@ class CvtColorImpl : public CvtColor { auto height = src.height(); auto width = src.width(); - auto channels = src.channel(); - auto stride = width * channels; + auto src_channels = src.channel(); + auto src_stride = width * src_channels; Mat dst_mat(height, width, dst_fmt, src.type(), device()); + auto dst_stride = width * dst_mat.channel(); auto convert = [&](auto type) -> Result { using T = typename decltype(type)::type; @@ -101,8 +102,8 @@ class CvtColorImpl : public CvtColor { if (!converter) { return Status(eNotSupported); } - auto ret = - converter(cuda_stream, height, width, stride, src.data(), stride, dst_mat.data()); + auto ret = converter(cuda_stream, height, width, src_stride, src.data(), dst_stride, + dst_mat.data()); if (ret != ppl::common::RC_SUCCESS) { return Status(eFail); } diff --git a/csrc/mmdeploy/preprocess/transform/load.cpp b/csrc/mmdeploy/preprocess/transform/load.cpp index 5640d1c478..73a3951253 100644 --- a/csrc/mmdeploy/preprocess/transform/load.cpp +++ b/csrc/mmdeploy/preprocess/transform/load.cpp @@ -55,7 +55,7 @@ class PrepareImage : public Transform { if (color_type_ == "color" || color_type_ == "color_ignore_orientation") { OUTCOME_TRY(cvt_color_.Apply(src_mat, dst_mat, PixelFormat::kBGR)); } else { - OUTCOME_TRY(cvt_color_.Apply(dst_mat, dst_mat, PixelFormat::kGRAYSCALE)); + OUTCOME_TRY(cvt_color_.Apply(src_mat, dst_mat, PixelFormat::kGRAYSCALE)); } auto tensor = to_tensor(dst_mat); if (to_float32_) { diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt index 6255fcd0ab..98499a4124 100644 --- a/demo/csrc/CMakeLists.txt +++ b/demo/csrc/CMakeLists.txt @@ -41,6 +41,8 @@ if (MMDEPLOY_BUILD_SDK_CXX_API) add_example(restorer cpp restorer) add_example(text_detector cpp text_ocr) add_example(text_detector cpp text_det_recog) + add_example(text_detector cpp text_det) + add_example(text_recognizer cpp text_recog) add_example(pose_detector cpp pose_detector) add_example(rotated_detector cpp rotated_detector) add_example(pose_detector cpp pose_tracker) diff --git a/demo/csrc/cpp/text_det.cxx b/demo/csrc/cpp/text_det.cxx new file mode 100644 index 0000000000..ba575759cc --- /dev/null +++ b/demo/csrc/cpp/text_det.cxx @@ -0,0 +1,47 @@ + +#include +#include +#include + +#include "mmdeploy/common.hpp" +#include "mmdeploy/text_detector.hpp" + +int main(int argc, char* argv[]) { + if (argc != 4) { + fprintf(stderr, "usage:\n text_det device_name model_path image_path\n"); + return 1; + } + auto device_name = argv[1]; + auto model_path = argv[2]; + auto image_path = argv[3]; + cv::Mat img = cv::imread(image_path); + if (!img.data) { + fprintf(stderr, "failed to load image: %s\n", image_path); + return 1; + } + + mmdeploy::Profiler profiler{"/tmp/profile.bin"}; + mmdeploy::Context context(mmdeploy::Device(device_name, 0)); + context.Add(profiler); + + mmdeploy::Model model(model_path); + mmdeploy::TextDetector detector(model, context); + + const int REPEAT = 20; + auto res = detector.Apply(img); + for (int i = 0; i < REPEAT - 1; ++i) { + res = detector.Apply(img); + } + + for (const auto& det : res) { + const auto& box = det.bbox; + for (int i = 0; i < 4; i++) { + cv::rectangle(img, cv::Point{(int)box[i].x, (int)box[i].y}, + cv::Point{(int)box[(i + 1) % 4].x, (int)box[(i + 1) % 4].y}, + cv::Scalar{0, 255, 0}); + } + } + cv::imwrite("output_ocr_detection.jpg", img); + + return 0; +} diff --git a/demo/csrc/cpp/text_recog.cxx b/demo/csrc/cpp/text_recog.cxx new file mode 100644 index 0000000000..d82fe6e927 --- /dev/null +++ b/demo/csrc/cpp/text_recog.cxx @@ -0,0 +1,52 @@ + +#include +#include +#include +#include + +#include "mmdeploy/common.hpp" +#include "mmdeploy/text_detector.hpp" +#include "mmdeploy/text_recognizer.hpp" + +int main(int argc, char* argv[]) { + if (argc != 4) { + fprintf(stderr, "usage:\n text_recog device_name model_path image_path\n"); + return 1; + } + auto device_name = argv[1]; + auto model_path = argv[2]; + auto image_path = argv[3]; + cv::Mat img = cv::imread(image_path); + if (!img.data) { + fprintf(stderr, "failed to load image: %s\n", image_path); + return 1; + } + + mmdeploy::TextDetection bbox = {{{0.f, (float)img.rows - 1}, + {0.f, 0.f}, + {(float)img.cols - 1, 0}, + {(float)img.cols, (float)img.rows - 1}}, + 1.0f}; + std::vector bboxes = {bbox}; + + mmdeploy::Profiler profiler{"/tmp/profile.bin"}; + mmdeploy::Context context(mmdeploy::Device(device_name, 0)); + context.Add(profiler); + + mmdeploy::Model model(model_path); + mmdeploy::TextRecognizer recognizer(model, context); + + const int REPEAT = 20; + auto res = recognizer.Apply(img, bboxes); + for (int i = 0; i < REPEAT - 1; ++i) { + res = recognizer.Apply(img); + } + + for (auto& reg : res) { + for (int i = 0; i < reg.length; i++) { + printf("%c %f\n", reg.text[i], reg.score[i]); + } + } + + return 0; +}