Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions data/kernels/atrous.cl
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,6 @@ eaw_synthesize(__write_only image2d_t out,
write_imagef (out, (int2)(x, y), sum);
}

__kernel void
eaw_zero(__write_only image2d_t out,
const int width,
const int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= width || y >= height) return;
write_imagef(out, (int2)(x, y), (float4)0.0f);
}

__kernel void
eaw_addbuffers(__write_only image2d_t out_out,
__read_only image2d_t out_in,
Expand Down
12 changes: 0 additions & 12 deletions data/kernels/basecurve.cl
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,6 @@ basecurve_lut(read_only image2d_t in, write_only image2d_t out, const int width,
write_imagef (out, (int2)(x, y), pixel);
}


kernel void
basecurve_zero(write_only image2d_t out, const int width, const int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

write_imagef (out, (int2)(x, y), (float4)0.0f);
}

/*
Original basecurve implementation. Applies a LUT on a per-channel basis which can cause color shifts.

Expand Down
12 changes: 0 additions & 12 deletions data/kernels/capture.cl
Original file line number Diff line number Diff line change
Expand Up @@ -167,18 +167,6 @@ __kernel void kernel_9x9_div(global float *in,
out[i] = luminance[i] / fmax(val, CAPTURE_YMIN);
}

__kernel void prefill_clip_mask(global float *mask,
const int width,
const int height)
{
const int col = get_global_id(0);
const int row = get_global_id(1);
if(col >= width || row >= height) return;

const int i = mad24(row, width, col);
mask[i] = 1.0f;
}

__kernel void prepare_blend(__read_only image2d_t cfa,
__read_only image2d_t dev_out,
const int filters,
Expand Down
14 changes: 0 additions & 14 deletions data/kernels/demosaic_markesteijn.cl
Original file line number Diff line number Diff line change
Expand Up @@ -869,20 +869,6 @@ markesteijn_homo_quench(global uchar *homosum1, global uchar *homosum2, const in
homosum2[glidx] = hmo2;
}

// Initialize output image to zero
kernel void
markesteijn_zero(write_only image2d_t out, const int width, const int height, const int border)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

// take sufficient border into account
if(x < border || x >= width-border || y < border || y >= height-border) return;

write_imagef(out, (int2)(x, y), (float4)0.0f);
}


// accumulate contributions of all directions into output image
kernel void
markesteijn_accu(read_only image2d_t in, write_only image2d_t out, global float *rgb,
Expand Down
14 changes: 0 additions & 14 deletions data/kernels/denoiseprofile.cl
Original file line number Diff line number Diff line change
Expand Up @@ -113,20 +113,6 @@ kernel void denoiseprofile_precondition_Y0U0V0(read_only image2d_t in,
write_imagef (out, (int2)(x, y), outpx);
}


kernel void
denoiseprofile_init(global float4* out, const int width, const int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
const int gidx = mad24(y, width, x);

if(x >= width || y >= height) return;

out[gidx] = (float4)0.0f;
}


kernel void
denoiseprofile_dist(read_only image2d_t in, global float* U4, const int width, const int height,
const int2 q)
Expand Down
13 changes: 0 additions & 13 deletions data/kernels/dwt.cl
Original file line number Diff line number Diff line change
Expand Up @@ -109,16 +109,3 @@ dwt_hat_transform_col(global float4 *lpass, int width, int height, const int sc,
* lpass_mult;
}
}

kernel void
dwt_init_buffer(global float4 *buffer, int width, int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);

if(x >= width || y >= height) return;

const int idx = mad24(y, width, x);

buffer[idx] = 0.f;
}
12 changes: 0 additions & 12 deletions data/kernels/nlmeans.cl
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,6 @@ static inline float ddirac(const int2 q)
return ((q.x || q.y) ? 1.0f : 0.0f);
}

kernel void nlmeans_init(global float4* out,
const int width,
const int height)
{
const int x = get_global_id(0);
const int y = get_global_id(1);
if(x >= width || y >= height) return;

out[mad24(y, width, x)] = (float4)0.0f;
}


kernel void nlmeans_dist(read_only image2d_t in,
global float *U4,
const int width,
Expand Down
4 changes: 4 additions & 0 deletions src/common/dlopencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,10 @@ dt_dlopencl_t *dt_dlopencl_init(const char *name)
(void (**)(void)) & ocl->symbols->dt_clGetMemObjectInfo);
success = success && dt_gmodule_symbol(module, "clGetImageInfo",
((void (**)(void)) & ocl->symbols->dt_clGetImageInfo));
success = success && dt_gmodule_symbol(module, "clEnqueueFillBuffer",
((void (**)(void)) & ocl->symbols->dt_clEnqueueFillBuffer));
success = success && dt_gmodule_symbol(module, "clEnqueueFillImage",
((void (**)(void)) & ocl->symbols->dt_clEnqueueFillImage));
}

ocl->have_opencl = success;
Expand Down
5 changes: 5 additions & 0 deletions src/common/dlopencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,9 @@ typedef cl_int (*dt_clEnqueueNativeKernel_t)(cl_command_queue, void (*user_func)
typedef cl_int (*dt_clEnqueueMarker_t)(cl_command_queue, cl_event *);
typedef cl_int (*dt_clEnqueueWaitForEvents_t)(cl_command_queue, cl_uint, const cl_event *);

typedef cl_int (*dt_clEnqueueFillBuffer_t)(cl_command_queue, cl_mem, const void *, size_t, size_t, size_t, cl_uint, const cl_event *, cl_event *);
typedef cl_int (*dt_clEnqueueFillImage_t)(cl_command_queue, cl_mem, const void *, const size_t *, const size_t *, cl_uint, const cl_event *, cl_event *);

typedef struct dt_dlopencl_symbols_t
{
dt_clGetPlatformIDs_t dt_clGetPlatformIDs;
Expand Down Expand Up @@ -206,6 +209,8 @@ typedef struct dt_dlopencl_symbols_t
dt_clEnqueueNativeKernel_t dt_clEnqueueNativeKernel;
dt_clEnqueueMarker_t dt_clEnqueueMarker;
dt_clEnqueueWaitForEvents_t dt_clEnqueueWaitForEvents;
dt_clEnqueueFillBuffer_t dt_clEnqueueFillBuffer;
dt_clEnqueueFillImage_t dt_clEnqueueFillImage;
} dt_dlopencl_symbols_t;


Expand Down
8 changes: 2 additions & 6 deletions src/common/dwt.c
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,6 @@ dt_dwt_cl_global_t *dt_dwt_init_cl_global()
g->kernel_dwt_subtract_layer = dt_opencl_create_kernel(program, "dwt_subtract_layer");
g->kernel_dwt_hat_transform_col = dt_opencl_create_kernel(program, "dwt_hat_transform_col");
g->kernel_dwt_hat_transform_row = dt_opencl_create_kernel(program, "dwt_hat_transform_row");
g->kernel_dwt_init_buffer = dt_opencl_create_kernel(program, "dwt_init_buffer");
return g;
}

Expand All @@ -545,7 +544,6 @@ void dt_dwt_free_cl_global(dt_dwt_cl_global_t *g)
dt_opencl_free_kernel(g->kernel_dwt_subtract_layer);
dt_opencl_free_kernel(g->kernel_dwt_hat_transform_col);
dt_opencl_free_kernel(g->kernel_dwt_hat_transform_row);
dt_opencl_free_kernel(g->kernel_dwt_init_buffer);

free(g);
}
Expand Down Expand Up @@ -655,8 +653,7 @@ static cl_int dwt_wavelet_decompose_cl(cl_mem img, dwt_params_cl_t *const p, _dw
if(layers == NULL) goto cleanup;

// init layer buffer
err = dt_opencl_enqueue_kernel_2d_args(devid, p->global->kernel_dwt_init_buffer, p->width, p->height,
CLARG(layers), CLARG(p->width), CLARG(p->height));
err = dt_opencl_fill_buffer(devid, layers, (size_t)p->width * p->height, p->ch, 0.0f);
if(err != CL_SUCCESS) goto cleanup;

if(p->merge_from_scale > 0)
Expand All @@ -665,8 +662,7 @@ static cl_int dwt_wavelet_decompose_cl(cl_mem img, dwt_params_cl_t *const p, _dw
if(merged_layers == NULL) goto cleanup;

// init reconstruct buffer
err = dt_opencl_enqueue_kernel_2d_args(devid, p->global->kernel_dwt_init_buffer, p->width, p->height,
CLARG(merged_layers), CLARG(p->width), CLARG(p->height));
err = dt_opencl_fill_buffer(devid, merged_layers, (size_t)p->width * p->height, p->ch, 0.0f);
if(err != CL_SUCCESS) goto cleanup;
}

Expand Down
1 change: 0 additions & 1 deletion src/common/dwt.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,6 @@ typedef struct dt_dwt_cl_global_t
int kernel_dwt_subtract_layer;
int kernel_dwt_hat_transform_col;
int kernel_dwt_hat_transform_row;
int kernel_dwt_init_buffer;
} dt_dwt_cl_global_t;

typedef struct dwt_params_cl_t
Expand Down
16 changes: 2 additions & 14 deletions src/common/nlmeans_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -583,18 +583,6 @@ static void get_blocksizes(
return;
}

// zero output pixels, as we will be accumulating them one patch at a time
static inline cl_int nlmeans_cl_init(
const int devid,
const int kernel,
cl_mem dev_out,
const int height,
const int width)
{
return dt_opencl_enqueue_kernel_2d_args(devid, kernel, width, height,
CLARG(dev_out), CLARG(width), CLARG(height));
}

// horizontal pass, add together columns of each patch
static inline cl_int nlmeans_cl_horiz(
const int devid,
Expand Down Expand Up @@ -663,7 +651,7 @@ int nlmeans_denoise_cl(
get_blocksizes(&hblocksize, &vblocksize, P, devid, params->kernel_horiz, params->kernel_vert);

// zero the output buffer into which we will be accumulating results
err = nlmeans_cl_init(devid,params->kernel_init,dev_out,height,width);
err = dt_opencl_fill_buffer(devid, dev_out, (size_t)width * height, 4, 0.0f);
if(err != CL_SUCCESS) goto error;

const size_t bwidth = ROUNDUP(width, hblocksize);
Expand Down Expand Up @@ -748,7 +736,7 @@ int nlmeans_denoiseprofile_cl(
get_blocksizes(&hblocksize, &vblocksize, P, devid, params->kernel_horiz, params->kernel_vert);

// zero the output buffer into which we will be accumulating results
err = nlmeans_cl_init(devid,params->kernel_init,dev_out,height,width);
err = dt_opencl_fill_buffer(devid, dev_out, (size_t)width * height, 4, 0.0f);
if(err != CL_SUCCESS) goto error;

const size_t bwidth = ROUNDUP(width, hblocksize);
Expand Down
1 change: 0 additions & 1 deletion src/common/nlmeans_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ struct dt_nlmeans_param_t
int decimate; // set to 1 to search only half the patches in the neighborhood (default = 0)
const float* const norm; // array of four per-channel weight factors
dt_dev_pixelpipe_type_t pipetype;
int kernel_init; // CL: initialization (runs once)
int kernel_dist; // CL: compute channel-normed squared pixel differences (runs for each patch)
int kernel_horiz; // CL: horizontal sum (runs for each patch)
int kernel_vert; // CL: vertical sum (runs for each patch)
Expand Down
52 changes: 52 additions & 0 deletions src/common/opencl.c
Original file line number Diff line number Diff line change
Expand Up @@ -3359,6 +3359,58 @@ int dt_opencl_unmap_mem_object(const int devid,
return err;
}

int dt_opencl_fill_buffer(const int devid,
cl_mem buffer,
const size_t pts,
const size_t ch,
const float val)
{
if(!_cldev_running(devid))
return DT_OPENCL_NODEVICE;
if(ch < 1 || ch > 4)
return DT_OPENCL_PROCESS_CL;

cl_event *eventp = _opencl_events_get_slot(devid, "[Fill Float Buffer]");
const float filler[4] = { val, val, val, val };
const size_t psize = sizeof(float) * ch;

const cl_int err = (darktable.opencl->dlocl->symbols->dt_clEnqueueFillBuffer)
(darktable.opencl->dev[devid].cmd_queue, buffer,
&filler, psize, 0, pts * psize, 0, NULL, eventp);

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_fill_float%i_buffer] could not fill buffer on device '%s' id=%d: %s",
(int)ch, darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

int dt_opencl_fill_image(const int devid,
cl_mem image,
const size_t *origin,
const size_t *region,
const float val)
{
if(!_cldev_running(devid))
return DT_OPENCL_NODEVICE;

cl_event *eventp = _opencl_events_get_slot(devid, "[Fill Image]");
const float filler[4] = { val, val, val, val };

const size_t org[3] = { origin ? origin[0] : 0, origin ? origin[1] : 0, 0 };
const size_t reg[3] = { region[0], region[1], 1 };

const cl_int err = (darktable.opencl->dlocl->symbols->dt_clEnqueueFillImage)
(darktable.opencl->dev[devid].cmd_queue, image, filler,
org, reg, 0, NULL, eventp);

if(err != CL_SUCCESS)
dt_print(DT_DEBUG_OPENCL,
"[dt_opencl_fill_image] could not fill image on device '%s' id=%d: %s",
darktable.opencl->dev[devid].fullname, devid, cl_errstr(err));
return err;
}

void *dt_opencl_alloc_device(const int devid,
const int width,
const int height,
Expand Down
10 changes: 10 additions & 0 deletions src/common/opencl.h
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,16 @@ int dt_opencl_write_host_to_image_raw(const int devid,
const int rowpitch,
const gboolean blocking);

int dt_opencl_fill_buffer(const int devid,
cl_mem buffer,
const size_t pts,
const size_t ch,
const float val);
int dt_opencl_fill_image(const int devid,
cl_mem image,
const size_t *orig,
const size_t *area,
const float val);
void *dt_opencl_copy_host_to_image(const int devid,
void *host,
const int width,
Expand Down
7 changes: 2 additions & 5 deletions src/iop/atrous.c
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ typedef struct dt_iop_atrous_gui_data_t

typedef struct dt_iop_atrous_global_data_t
{
int kernel_zero;
int kernel_decompose;
int kernel_synthesize;
int kernel_addbuffers;
Expand Down Expand Up @@ -412,8 +411,8 @@ int process_cl(dt_iop_module_t *self,
if(!dev_detail || !dev_tmp || !dev_tmp2 || !dev_filter) goto error;

// clear dev_out to zeros, as we will be incrementally accumulating results there
err = dt_opencl_enqueue_kernel_2d_args(devid, gd->kernel_zero, width, height,
CLARG(dev_out), CLARG(width), CLARG(height));
const size_t area[2] = { width, height };
err = dt_opencl_fill_image(devid, dev_out, CLIMG_ORIGIN, area, 0.0f);
if(err != CL_SUCCESS) goto error;

// the buffers for the buffer ping-pong. We start with dev_in as
Expand Down Expand Up @@ -627,7 +626,6 @@ void init_global(dt_iop_module_so_t *self)
gd->kernel_decompose = dt_opencl_create_kernel(program, "eaw_decompose");
gd->kernel_synthesize = dt_opencl_create_kernel(program, "eaw_synthesize");
#ifdef USE_NEW_CL
gd->kernel_zero = dt_opencl_create_kernel(program, "eaw_zero");
gd->kernel_addbuffers = dt_opencl_create_kernel(program, "eaw_addbuffers");
#endif
}
Expand All @@ -638,7 +636,6 @@ void cleanup_global(dt_iop_module_so_t *self)
dt_opencl_free_kernel(gd->kernel_decompose);
dt_opencl_free_kernel(gd->kernel_synthesize);
#ifdef USE_NEW_CL
dt_opencl_free_kernel(gd->kernel_zero);
dt_opencl_free_kernel(gd->kernel_addbuffers);
#endif
free(self->data);
Expand Down
Loading
Loading