Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: move processing implementation to a separate unit #632

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
#include <string>
#include <vector>

// #include "preprocessing.hpp"
#include "flux.hpp"
#include "stable-diffusion.h"
#include "preprocessing.hpp"

#define STB_IMAGE_IMPLEMENTATION
#define STB_IMAGE_STATIC
Expand Down
222 changes: 222 additions & 0 deletions preprocessing.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
#include "preprocessing.hpp"
#define M_PI_ 3.14159265358979323846

void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) {
struct ggml_init_params params;
params.mem_size = 20 * 1024 * 1024; // 10
params.mem_buffer = NULL;
params.no_alloc = false;
struct ggml_context* ctx0 = ggml_init(params);
struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1);
ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel));
ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1);
ggml_cgraph* gf = ggml_new_graph(ctx0);
ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output));
ggml_graph_compute_with_ctx(ctx0, gf, 1);
ggml_free(ctx0);
}

void gaussian_kernel(struct ggml_tensor* kernel) {
int ks_mid = kernel->ne[0] / 2;
float sigma = 1.4f;
float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f));
for (int y = 0; y < kernel->ne[0]; y++) {
float gx = -ks_mid + y;
for (int x = 0; x < kernel->ne[1]; x++) {
float gy = -ks_mid + x;
float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal;
ggml_tensor_set_f32(kernel, k_, x, y);
}
}
}

void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) {
for (int iy = 0; iy < rgb_img->ne[1]; iy++) {
for (int ix = 0; ix < rgb_img->ne[0]; ix++) {
float r = ggml_tensor_get_f32(rgb_img, ix, iy);
float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1);
float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2);
float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b;
ggml_tensor_set_f32(grayscale, gray, ix, iy);
}
}
}

void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) {
int n_elements = ggml_nelements(h);
float* dx = (float*)x->data;
float* dy = (float*)y->data;
float* dh = (float*)h->data;
for (int i = 0; i < n_elements; i++) {
dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]);
}
}

void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) {
int n_elements = ggml_nelements(h);
float* dx = (float*)x->data;
float* dy = (float*)y->data;
float* dh = (float*)h->data;
for (int i = 0; i < n_elements; i++) {
dh[i] = atan2f(dy[i], dx[i]);
}
}

void normalize_tensor(struct ggml_tensor* g) {
int n_elements = ggml_nelements(g);
float* dg = (float*)g->data;
float max = -INFINITY;
for (int i = 0; i < n_elements; i++) {
max = dg[i] > max ? dg[i] : max;
}
max = 1.0f / max;
for (int i = 0; i < n_elements; i++) {
dg[i] *= max;
}
}

void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struct ggml_tensor* D) {
for (int iy = 1; iy < result->ne[1] - 1; iy++) {
for (int ix = 1; ix < result->ne[0] - 1; ix++) {
float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_;
angle = angle < 0.0f ? angle += 180.0f : angle;
float q = 1.0f;
float r = 1.0f;

// angle 0
if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) {
q = ggml_tensor_get_f32(G, ix, iy + 1);
r = ggml_tensor_get_f32(G, ix, iy - 1);
}
// angle 45
else if (22.5f >= angle && angle < 67.5f) {
q = ggml_tensor_get_f32(G, ix + 1, iy - 1);
r = ggml_tensor_get_f32(G, ix - 1, iy + 1);
}
// angle 90
else if (67.5f >= angle && angle < 112.5) {
q = ggml_tensor_get_f32(G, ix + 1, iy);
r = ggml_tensor_get_f32(G, ix - 1, iy);
}
// angle 135
else if (112.5 >= angle && angle < 157.5f) {
q = ggml_tensor_get_f32(G, ix - 1, iy - 1);
r = ggml_tensor_get_f32(G, ix + 1, iy + 1);
}

float cur = ggml_tensor_get_f32(G, ix, iy);
if ((cur >= q) && (cur >= r)) {
ggml_tensor_set_f32(result, cur, ix, iy);
} else {
ggml_tensor_set_f32(result, 0.0f, ix, iy);
}
}
}
}

void threshold_hystersis(struct ggml_tensor* img, float high_threshold, float low_threshold, float weak, float strong) {
int n_elements = ggml_nelements(img);
float* imd = (float*)img->data;
float max = -INFINITY;
for (int i = 0; i < n_elements; i++) {
max = imd[i] > max ? imd[i] : max;
}
float ht = max * high_threshold;
float lt = ht * low_threshold;
for (int i = 0; i < n_elements; i++) {
float img_v = imd[i];
if (img_v >= ht) { // strong pixel
imd[i] = strong;
} else if (img_v <= ht && img_v >= lt) { // strong pixel
imd[i] = weak;
}
}

for (int iy = 0; iy < img->ne[1]; iy++) {
for (int ix = 0; ix < img->ne[0]; ix++) {
if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) {
ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy);
} else {
ggml_tensor_set_f32(img, 0.0f, ix, iy);
}
}
}

// hysteresis
for (int iy = 1; iy < img->ne[1] - 1; iy++) {
for (int ix = 1; ix < img->ne[0] - 1; ix++) {
float imd_v = ggml_tensor_get_f32(img, ix, iy);
if (imd_v == weak) {
if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong ||
ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong ||
ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) {
ggml_tensor_set_f32(img, strong, ix, iy);
} else {
ggml_tensor_set_f32(img, 0.0f, ix, iy);
}
}
}
}
}

uint8_t* preprocess_canny(uint8_t* img, int width, int height, float high_threshold, float low_threshold, float weak, float strong, bool inverse) {
struct ggml_init_params params;
params.mem_size = static_cast<size_t>(10 * 1024 * 1024); // 10
params.mem_buffer = NULL;
params.no_alloc = false;
struct ggml_context* work_ctx = ggml_init(params);

if (!work_ctx) {
LOG_ERROR("ggml_init() failed");
return NULL;
}

float kX[9] = {
-1, 0, 1,
-2, 0, 2,
-1, 0, 1};

float kY[9] = {
1, 2, 1,
0, 0, 0,
-1, -2, -1};

// generate kernel
int kernel_size = 5;
struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1);
struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx));
struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1);
memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky));
gaussian_kernel(gkernel);
struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1);
struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1);
struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray);
struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray);
struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray);
struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray);
sd_image_to_tensor(img, image);
grayscale(image, image_gray);
convolve(image_gray, image_gray, gkernel, 2);
convolve(image_gray, iX, sf_kx, 1);
convolve(image_gray, iY, sf_ky, 1);
prop_hypot(iX, iY, G);
normalize_tensor(G);
prop_arctan2(iX, iY, tetha);
non_max_supression(image_gray, G, tetha);
threshold_hystersis(image_gray, high_threshold, low_threshold, weak, strong);
// to RGB channels
for (int iy = 0; iy < height; iy++) {
for (int ix = 0; ix < width; ix++) {
float gray = ggml_tensor_get_f32(image_gray, ix, iy);
gray = inverse ? 1.0f - gray : gray;
ggml_tensor_set_f32(image, gray, ix, iy);
ggml_tensor_set_f32(image, gray, ix, iy, 1);
ggml_tensor_set_f32(image, gray, ix, iy, 2);
}
}
free(img);
uint8_t* output = sd_tensor_to_image(image);
ggml_free(work_ctx);
return output;
}
Loading
Loading