diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index af6b2bbd..d8bca738 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -6,9 +6,9 @@ #include #include -// #include "preprocessing.hpp" #include "flux.hpp" #include "stable-diffusion.h" +#include "preprocessing.hpp" #define STB_IMAGE_IMPLEMENTATION #define STB_IMAGE_STATIC diff --git a/preprocessing.cpp b/preprocessing.cpp new file mode 100644 index 00000000..d0a02df9 --- /dev/null +++ b/preprocessing.cpp @@ -0,0 +1,222 @@ +#include "preprocessing.hpp" +#define M_PI_ 3.14159265358979323846 + +void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) { + struct ggml_init_params params; + params.mem_size = 20 * 1024 * 1024; // 10 + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* ctx0 = ggml_init(params); + struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1); + ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel)); + ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1); + ggml_cgraph* gf = ggml_new_graph(ctx0); + ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output)); + ggml_graph_compute_with_ctx(ctx0, gf, 1); + ggml_free(ctx0); +} + +void gaussian_kernel(struct ggml_tensor* kernel) { + int ks_mid = kernel->ne[0] / 2; + float sigma = 1.4f; + float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f)); + for (int y = 0; y < kernel->ne[0]; y++) { + float gx = -ks_mid + y; + for (int x = 0; x < kernel->ne[1]; x++) { + float gy = -ks_mid + x; + float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal; + ggml_tensor_set_f32(kernel, k_, x, y); + } + } +} + +void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) { + for (int iy = 0; iy < rgb_img->ne[1]; iy++) { + for (int ix = 0; ix < rgb_img->ne[0]; ix++) { + float r = ggml_tensor_get_f32(rgb_img, ix, iy); + float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1); + float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2); + float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b; + ggml_tensor_set_f32(grayscale, gray, ix, iy); + } + } +} + +void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { + int n_elements = ggml_nelements(h); + float* dx = (float*)x->data; + float* dy = (float*)y->data; + float* dh = (float*)h->data; + for (int i = 0; i < n_elements; i++) { + dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]); + } +} + +void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { + int n_elements = ggml_nelements(h); + float* dx = (float*)x->data; + float* dy = (float*)y->data; + float* dh = (float*)h->data; + for (int i = 0; i < n_elements; i++) { + dh[i] = atan2f(dy[i], dx[i]); + } +} + +void normalize_tensor(struct ggml_tensor* g) { + int n_elements = ggml_nelements(g); + float* dg = (float*)g->data; + float max = -INFINITY; + for (int i = 0; i < n_elements; i++) { + max = dg[i] > max ? dg[i] : max; + } + max = 1.0f / max; + for (int i = 0; i < n_elements; i++) { + dg[i] *= max; + } +} + +void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) { + for (int iy = 1; iy < result->ne[1] - 1; iy++) { + for (int ix = 1; ix < result->ne[0] - 1; ix++) { + float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_; + angle = angle < 0.0f ? angle += 180.0f : angle; + float q = 1.0f; + float r = 1.0f; + + // angle 0 + if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) { + q = ggml_tensor_get_f32(G, ix, iy + 1); + r = ggml_tensor_get_f32(G, ix, iy - 1); + } + // angle 45 + else if (22.5f >= angle && angle < 67.5f) { + q = ggml_tensor_get_f32(G, ix + 1, iy - 1); + r = ggml_tensor_get_f32(G, ix - 1, iy + 1); + } + // angle 90 + else if (67.5f >= angle && angle < 112.5) { + q = ggml_tensor_get_f32(G, ix + 1, iy); + r = ggml_tensor_get_f32(G, ix - 1, iy); + } + // angle 135 + else if (112.5 >= angle && angle < 157.5f) { + q = ggml_tensor_get_f32(G, ix - 1, iy - 1); + r = ggml_tensor_get_f32(G, ix + 1, iy + 1); + } + + float cur = ggml_tensor_get_f32(G, ix, iy); + if ((cur >= q) && (cur >= r)) { + ggml_tensor_set_f32(result, cur, ix, iy); + } else { + ggml_tensor_set_f32(result, 0.0f, ix, iy); + } + } + } +} + +void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) { + int n_elements = ggml_nelements(img); + float* imd = (float*)img->data; + float max = -INFINITY; + for (int i = 0; i < n_elements; i++) { + max = imd[i] > max ? imd[i] : max; + } + float ht = max * high_threshold; + float lt = ht * low_threshold; + for (int i = 0; i < n_elements; i++) { + float img_v = imd[i]; + if (img_v >= ht) { // strong pixel + imd[i] = strong; + } else if (img_v <= ht && img_v >= lt) { // strong pixel + imd[i] = weak; + } + } + + for (int iy = 0; iy < img->ne[1]; iy++) { + for (int ix = 0; ix < img->ne[0]; ix++) { + if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) { + ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy); + } else { + ggml_tensor_set_f32(img, 0.0f, ix, iy); + } + } + } + + // hysteresis + for (int iy = 1; iy < img->ne[1] - 1; iy++) { + for (int ix = 1; ix < img->ne[0] - 1; ix++) { + float imd_v = ggml_tensor_get_f32(img, ix, iy); + if (imd_v == weak) { + if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong || + ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong || + ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) { + ggml_tensor_set_f32(img, strong, ix, iy); + } else { + ggml_tensor_set_f32(img, 0.0f, ix, iy); + } + } + } + } +} + +uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) { + struct ggml_init_params params; + params.mem_size = static_cast(10 * 1024 * 1024); // 10 + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* work_ctx = ggml_init(params); + + if (!work_ctx) { + LOG_ERROR("ggml_init() failed"); + return NULL; + } + + float kX[9] = { + -1, 0, 1, + -2, 0, 2, + -1, 0, 1}; + + float kY[9] = { + 1, 2, 1, + 0, 0, 0, + -1, -2, -1}; + + // generate kernel + int kernel_size = 5; + struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1); + struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); + memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx)); + struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); + memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky)); + gaussian_kernel(gkernel); + struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); + struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1); + struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray); + sd_image_to_tensor(img, image); + grayscale(image, image_gray); + convolve(image_gray, image_gray, gkernel, 2); + convolve(image_gray, iX, sf_kx, 1); + convolve(image_gray, iY, sf_ky, 1); + prop_hypot(iX, iY, G); + normalize_tensor(G); + prop_arctan2(iX, iY, tetha); + non_max_supression(image_gray, G, tetha); + threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong); + // to RGB channels + for (int iy = 0; iy < height; iy++) { + for (int ix = 0; ix < width; ix++) { + float gray = ggml_tensor_get_f32(image_gray, ix, iy); + gray = inverse ? 1.0f - gray : gray; + ggml_tensor_set_f32(image, gray, ix, iy); + ggml_tensor_set_f32(image, gray, ix, iy, 1); + ggml_tensor_set_f32(image, gray, ix, iy, 2); + } + } + free(img); + uint8_t* output = sd_tensor_to_image(image); + ggml_free(work_ctx); + return output; +} diff --git a/preprocessing.hpp b/preprocessing.hpp index 4ea1dbab..ae0a7aba 100644 --- a/preprocessing.hpp +++ b/preprocessing.hpp @@ -2,226 +2,23 @@ #define __PREPROCESSING_HPP__ #include "ggml_extend.hpp" -#define M_PI_ 3.14159265358979323846 -void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) { - struct ggml_init_params params; - params.mem_size = 20 * 1024 * 1024; // 10 - params.mem_buffer = NULL; - params.no_alloc = false; - struct ggml_context* ctx0 = ggml_init(params); - struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1); - ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel)); - ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1); - ggml_cgraph* gf = ggml_new_graph(ctx0); - ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output)); - ggml_graph_compute_with_ctx(ctx0, gf, 1); - ggml_free(ctx0); -} +void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding); -void gaussian_kernel(struct ggml_tensor* kernel) { - int ks_mid = kernel->ne[0] / 2; - float sigma = 1.4f; - float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f)); - for (int y = 0; y < kernel->ne[0]; y++) { - float gx = -ks_mid + y; - for (int x = 0; x < kernel->ne[1]; x++) { - float gy = -ks_mid + x; - float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal; - ggml_tensor_set_f32(kernel, k_, x, y); - } - } -} +void gaussian_kernel(struct ggml_tensor* kernel); -void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) { - for (int iy = 0; iy < rgb_img->ne[1]; iy++) { - for (int ix = 0; ix < rgb_img->ne[0]; ix++) { - float r = ggml_tensor_get_f32(rgb_img, ix, iy); - float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1); - float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2); - float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b; - ggml_tensor_set_f32(grayscale, gray, ix, iy); - } - } -} +void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale); -void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { - int n_elements = ggml_nelements(h); - float* dx = (float*)x->data; - float* dy = (float*)y->data; - float* dh = (float*)h->data; - for (int i = 0; i < n_elements; i++) { - dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]); - } -} +void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h); -void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { - int n_elements = ggml_nelements(h); - float* dx = (float*)x->data; - float* dy = (float*)y->data; - float* dh = (float*)h->data; - for (int i = 0; i < n_elements; i++) { - dh[i] = atan2f(dy[i], dx[i]); - } -} +void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h); -void normalize_tensor(struct ggml_tensor* g) { - int n_elements = ggml_nelements(g); - float* dg = (float*)g->data; - float max = -INFINITY; - for (int i = 0; i < n_elements; i++) { - max = dg[i] > max ? dg[i] : max; - } - max = 1.0f / max; - for (int i = 0; i < n_elements; i++) { - dg[i] *= max; - } -} +void normalize_tensor(struct ggml_tensor* g); -void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) { - for (int iy = 1; iy < result->ne[1] - 1; iy++) { - for (int ix = 1; ix < result->ne[0] - 1; ix++) { - float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_; - angle = angle < 0.0f ? angle += 180.0f : angle; - float q = 1.0f; - float r = 1.0f; +void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D); - // angle 0 - if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) { - q = ggml_tensor_get_f32(G, ix, iy + 1); - r = ggml_tensor_get_f32(G, ix, iy - 1); - } - // angle 45 - else if (22.5f >= angle && angle < 67.5f) { - q = ggml_tensor_get_f32(G, ix + 1, iy - 1); - r = ggml_tensor_get_f32(G, ix - 1, iy + 1); - } - // angle 90 - else if (67.5f >= angle && angle < 112.5) { - q = ggml_tensor_get_f32(G, ix + 1, iy); - r = ggml_tensor_get_f32(G, ix - 1, iy); - } - // angle 135 - else if (112.5 >= angle && angle < 157.5f) { - q = ggml_tensor_get_f32(G, ix - 1, iy - 1); - r = ggml_tensor_get_f32(G, ix + 1, iy + 1); - } +void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong); - float cur = ggml_tensor_get_f32(G, ix, iy); - if ((cur >= q) && (cur >= r)) { - ggml_tensor_set_f32(result, cur, ix, iy); - } else { - ggml_tensor_set_f32(result, 0.0f, ix, iy); - } - } - } -} +uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse); -void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) { - int n_elements = ggml_nelements(img); - float* imd = (float*)img->data; - float max = -INFINITY; - for (int i = 0; i < n_elements; i++) { - max = imd[i] > max ? imd[i] : max; - } - float ht = max * high_threshold; - float lt = ht * low_threshold; - for (int i = 0; i < n_elements; i++) { - float img_v = imd[i]; - if (img_v >= ht) { // strong pixel - imd[i] = strong; - } else if (img_v <= ht && img_v >= lt) { // strong pixel - imd[i] = weak; - } - } - - for (int iy = 0; iy < img->ne[1]; iy++) { - for (int ix = 0; ix < img->ne[0]; ix++) { - if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) { - ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy); - } else { - ggml_tensor_set_f32(img, 0.0f, ix, iy); - } - } - } - - // hysteresis - for (int iy = 1; iy < img->ne[1] - 1; iy++) { - for (int ix = 1; ix < img->ne[0] - 1; ix++) { - float imd_v = ggml_tensor_get_f32(img, ix, iy); - if (imd_v == weak) { - if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong || - ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong || - ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) { - ggml_tensor_set_f32(img, strong, ix, iy); - } else { - ggml_tensor_set_f32(img, 0.0f, ix, iy); - } - } - } - } -} - -uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) { - struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024 * 1024); // 10 - params.mem_buffer = NULL; - params.no_alloc = false; - struct ggml_context* work_ctx = ggml_init(params); - - if (!work_ctx) { - LOG_ERROR("ggml_init() failed"); - return NULL; - } - - float kX[9] = { - -1, 0, 1, - -2, 0, 2, - -1, 0, 1}; - - float kY[9] = { - 1, 2, 1, - 0, 0, 0, - -1, -2, -1}; - - // generate kernel - int kernel_size = 5; - struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1); - struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); - memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx)); - struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); - memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky)); - gaussian_kernel(gkernel); - struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); - struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1); - struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray); - struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray); - struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray); - struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray); - sd_image_to_tensor(img, image); - grayscale(image, image_gray); - convolve(image_gray, image_gray, gkernel, 2); - convolve(image_gray, iX, sf_kx, 1); - convolve(image_gray, iY, sf_ky, 1); - prop_hypot(iX, iY, G); - normalize_tensor(G); - prop_arctan2(iX, iY, tetha); - non_max_supression(image_gray, G, tetha); - threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong); - // to RGB channels - for (int iy = 0; iy < height; iy++) { - for (int ix = 0; ix < width; ix++) { - float gray = ggml_tensor_get_f32(image_gray, ix, iy); - gray = inverse ? 1.0f - gray : gray; - ggml_tensor_set_f32(image, gray, ix, iy); - ggml_tensor_set_f32(image, gray, ix, iy, 1); - ggml_tensor_set_f32(image, gray, ix, iy, 2); - } - } - free(img); - uint8_t* output = sd_tensor_to_image(image); - ggml_free(work_ctx); - return output; -} - -#endif // __PREPROCESSING_HPP__ \ No newline at end of file +#endif // __PREPROCESSING_HPP__