diff --git a/src/common/guided_filter.c b/src/common/guided_filter.c
index fbb95c1b5758..855ec7512bc5 100644
--- a/src/common/guided_filter.c
+++ b/src/common/guided_filter.c
@@ -484,10 +484,8 @@ static int _guided_filter_cl_impl(int devid,
   const gboolean tiling = num_tiles > 1;
 
   // When should we avoid internal tiling and thus use CPU fallback code? 
-  // Lets use advantage hint if provided or assume OpenCL is 10 times faster
-  const float hint = darktable.opencl->dev[devid].advantage;
-  const float advantage = hint > 1.0f ? 1.0f / hint : 0.1f;
-  const gboolean possible = ((float)valid_rows / (float)tile_height) > advantage;
+  // Lets assume OpenCL is 10 times faster
+  const gboolean possible = ((float)valid_rows / (float)tile_height) > 0.1f;
 
   if(tiling || (darktable.unmuted & DT_DEBUG_VERBOSE))
     dt_print(DT_DEBUG_PIPE | DT_DEBUG_TILING,
diff --git a/src/common/opencl.c b/src/common/opencl.c
index 713ae0bc687c..ec53967bcda4 100644
--- a/src/common/opencl.c
+++ b/src/common/opencl.c
@@ -359,8 +359,8 @@ static void _opencl_write_device_config(const int devid)
 
   gchar key[256] = { 0 };
   gchar dat[512] = { 0 };
-  g_snprintf(key, 254, "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
-  g_snprintf(dat, 510, "%i %i %i %i %i %.3f %.3f",
+  g_snprintf(key, sizeof(key), "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
+  g_snprintf(dat, sizeof(dat), "%i %i %i %i %i %.3f %.3f",
     cl->dev[devid].micro_nap,
     cl->dev[devid].pinned_memory,
 
@@ -368,17 +368,24 @@ static void _opencl_write_device_config(const int devid)
     cl->dev[devid].use_events ? 1 : 0,
     cl->dev[devid].asyncmode,
     cl->dev[devid].disabled,
-    cl->dev[devid].advantage,
+    0.0f,
     cl->dev[devid].unified_fraction);
   dt_print_nts(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE,
-           "\n[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
+           "[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
+  dt_conf_set_string(key, dat);
+
+  // write per device list of modules that should not use OpenCL
+  g_snprintf(key, sizeof(key), "%s%s_nocl", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
+  g_snprintf(dat, sizeof(dat), "%s", cl->dev[devid].avoid ? cl->dev[devid].avoid : "");
+  dt_print_nts(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE,
+           "[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
   dt_conf_set_string(key, dat);
 
   // Also take care of extended device data, these are not only device
   // specific but also depend on the devid to support systems with two
   // similar cards.
-  g_snprintf(key, 254, "%s%s_id%i", DT_CLDEVICE_HEAD, cl->dev[devid].cname, devid);
-  g_snprintf(dat, 510, "%i", cl->dev[devid].headroom);
+  g_snprintf(key, sizeof(key), "%s%s_id%i", DT_CLDEVICE_HEAD, cl->dev[devid].cname, devid);
+  g_snprintf(dat, sizeof(dat), "%i", cl->dev[devid].headroom);
   dt_print_nts(DT_DEBUG_OPENCL | DT_DEBUG_VERBOSE,
            "[opencl_write_device_config] writing data '%s' for '%s'\n", dat, key);
   dt_conf_set_string(key, dat);
@@ -413,7 +420,7 @@ static gboolean _opencl_read_device_config(const int devid)
   dt_opencl_t *cl = darktable.opencl;
   dt_opencl_device_t *cldid = &cl->dev[devid];
   gchar key[256] = { 0 };
-  g_snprintf(key, 254, "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
+  g_snprintf(key, sizeof(key), "%s%s", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
 
   const gboolean existing_device = dt_conf_key_not_empty(key);
   gboolean safety_ok = TRUE;
@@ -425,17 +432,16 @@ static gboolean _opencl_read_device_config(const int devid)
     int events;
     int asyncmode;
     int disabled;
-    float advantage;
+    float dummy;
     float unified_fraction;
     sscanf(dat, "%i %i %i %i %i %f %f",
-           &micro_nap, &pinned_memory, &events, &asyncmode, &disabled, &advantage, &unified_fraction);
+           &micro_nap, &pinned_memory, &events, &asyncmode, &disabled, &dummy, &unified_fraction);
 
     cldid->use_events = events ? TRUE : FALSE;
     cldid->micro_nap = micro_nap;
     cldid->pinned_memory = pinned_memory ? TRUE : FALSE;
     cldid->asyncmode = asyncmode ? TRUE : FALSE;
     cldid->disabled = disabled ? TRUE : FALSE;
-    cldid->advantage = advantage;
     cldid->unified_fraction = unified_fraction;
   }
 
@@ -444,12 +450,14 @@ static gboolean _opencl_read_device_config(const int devid)
     cldid->unified_fraction = 0.25f;
   if((cldid->micro_nap < 0) || (cldid->micro_nap > 1000000))
     cldid->micro_nap = 250;
-  if((cldid->advantage < 0.0f) || (cldid->advantage > 10000.0f))
-    cldid->advantage = 0.0f;
+
+  // Also read the per-device list of modules to be avoided for OpenCL
+  g_snprintf(key, sizeof(key), "%s%s_nocl", DT_CLDEVICE_HEAD, cl->dev[devid].cname);
+  cldid->avoid = dt_conf_key_not_empty(key) ? dt_conf_get_string(key) : NULL;
 
   // Also take care of extended device data, these are not only device
   // specific but also depend on the devid
-  g_snprintf(key, 254, "%s%s_id%i", DT_CLDEVICE_HEAD, cldid->cname, devid);
+  g_snprintf(key, sizeof(key), "%s%s_id%i", DT_CLDEVICE_HEAD, cldid->cname, devid);
   if(dt_conf_key_not_empty(key))
   {
     const gchar *dat = dt_conf_get_string_const(key);
@@ -516,6 +524,7 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
   cl->dev[dev].cname = NULL;
   cl->dev[dev].options = NULL;
   cl->dev[dev].cflags = NULL;
+  cl->dev[dev].avoid = NULL;
   cl->dev[dev].memory_in_use = 0;
   cl->dev[dev].peak_memory = 0;
   cl->dev[dev].used_available = 0;
@@ -528,7 +537,6 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
   cl->dev[dev].clmem_error = FALSE;
   cl->dev[dev].clroundup_wd = 16;
   cl->dev[dev].clroundup_ht = 16;
-  cl->dev[dev].advantage = 0.0f;
   cl->dev[dev].use_events = TRUE;
   cl->dev[dev].asyncmode = FALSE;
   cl->dev[dev].disabled = FALSE;
@@ -785,13 +793,16 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
   dt_print_nts(DT_DEBUG_OPENCL, "   DEVICE VERSION:           %s API=%s\n",
     cl->dev[dev].device_version,
     cl->api30 ? "300" : "120");
-  dt_print_nts(DT_DEBUG_OPENCL, "   DEVICE_TYPE:              %s%s%s%s%s\n",
+  dt_print_nts(DT_DEBUG_OPENCL, "   DEVICE_TYPE:              %s%s%s%s%s",
       ((type & CL_DEVICE_TYPE_CPU) == CL_DEVICE_TYPE_CPU) ? "CPU" : "",
       ((type & CL_DEVICE_TYPE_GPU) == CL_DEVICE_TYPE_GPU) ? "GPU" : "",
       ((type & CL_DEVICE_TYPE_CUSTOM) == CL_DEVICE_TYPE_CUSTOM) ? "CUSTOM" : "",
       (type & CL_DEVICE_TYPE_ACCELERATOR)                 ? ", Accelerator" : "",
       unified_memory ? ", unified mem" : ", dedicated mem" );
 
+  if(unified_memory) dt_print_nts(DT_DEBUG_OPENCL, " (%i%%)\n", (int)(100.f * cl->dev[dev].unified_fraction));
+  else dt_print_nts(DT_DEBUG_OPENCL, "\n");
+
   if(is_custom_device && newdevice)
   {
     dt_print_nts(DT_DEBUG_OPENCL,
@@ -853,8 +864,8 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
   if(cl->dev[dev].max_global_mem < (uint64_t)800ul * DT_MEGA)
   {
     dt_print_nts(DT_DEBUG_OPENCL,
-                 "   *** insufficient global memory (%" PRIu64 "MB) ***\n",
-                 cl->dev[dev].max_global_mem / DT_MEGA);
+                 "   *** insufficient global memory %zu MB) ***\n",
+                 (size_t)cl->dev[dev].max_global_mem / DT_MEGA);
     res = TRUE;
     cl->dev[dev].disabled |= TRUE;
     goto end;
@@ -875,18 +886,15 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
   }
 
   dt_print_nts(DT_DEBUG_OPENCL,
-               "   GLOBAL MEM SIZE:          %.0f MB\n",
-               (double)cl->dev[dev].max_global_mem / (double)DT_MEGA);
+               "   GLOBAL MEM SIZE:          %zu MB\n", (size_t)(cl->dev[dev].max_global_mem / DT_MEGA));
   dt_print_nts(DT_DEBUG_OPENCL,
-               "   MAX IMAGE ALLOC:          %.0f MB\n",
-               (double)cl->dev[dev].max_mem_alloc / (double)DT_MEGA);
+               "   MAX IMAGE ALLOC:          %zu MB\n", (size_t)(cl->dev[dev].max_mem_alloc / DT_MEGA));
   dt_print_nts(DT_DEBUG_OPENCL,
-               "   MAX IMAGE SIZE:           %zd x %zd\n",
-               cl->dev[dev].max_image_width, cl->dev[dev].max_image_height);
+               "   MAX IMAGE SIZE:           %zu x %zu\n", cl->dev[dev].max_image_width, cl->dev[dev].max_image_height);
   dt_print_nts(DT_DEBUG_OPENCL,
-               "   MAX CONSTANT BUFFER:      %.0f KB\n", (double)cl->dev[dev].max_mem_constant / 1024.0);
+               "   MAX CONSTANT BUFFER:      %zu KB\n", (size_t)(cl->dev[dev].max_mem_constant / 1024));
   dt_print_nts(DT_DEBUG_OPENCL,
-               "   LOCAL MEM SIZE:           %zu KB\n", cl->dev[dev].local_size / 1024lu);
+               "   LOCAL MEM SIZE:           %zu KB\n", (size_t)(cl->dev[dev].local_size / 1024));
   dt_print_nts(DT_DEBUG_OPENCL,
                "   ADDRESS ALIGN:            %d B\n", cl->dev[dev].alignsize / 8);
   dt_print_nts(DT_DEBUG_OPENCL,
@@ -961,10 +969,10 @@ static gboolean _opencl_device_init(dt_opencl_t *cl,
                "   EVENTS HANDLED:           %s\n", STR_YESNO(cl->dev[dev].use_events));
   dt_print_nts(DT_DEBUG_OPENCL,
                "   OPENCL FAST MODE:         %s\n", STR_YESNO(fastopencl));
-  dt_print_nts(DT_DEBUG_OPENCL,
-               "   TILING ADVANTAGE:         %.3f\n", cl->dev[dev].advantage);
   dt_print_nts(DT_DEBUG_OPENCL,
                "   DEFAULT DEVICE:           %s\n", STR_YESNO(type & CL_DEVICE_TYPE_DEFAULT));
+  dt_print_nts(DT_DEBUG_OPENCL,
+               "   AVOIDED MODULES:          %s\n", cl->dev[dev].avoid ? cl->dev[dev].avoid : "none");
 
   if(cl->dev[dev].disabled)
   {
@@ -1221,6 +1229,7 @@ static void _cleanup_cl_device_mem(dt_opencl_t *cl, const int i)
   free((void *)(cl->dev[i].cname));
   free((void *)(cl->dev[i].options));
   free((void *)(cl->dev[i].cflags));
+  g_free((void *)(cl->dev[i].avoid));
 }
 
 void dt_opencl_init(dt_opencl_t *cl,
@@ -1607,7 +1616,7 @@ void dt_opencl_init(dt_opencl_t *cl,
     dt_opencl_scheduling_profile_t profile = _opencl_get_scheduling_profile();
     _opencl_apply_scheduling_profile(profile);
 
-    // let's keep track on unified memory devices
+    // let's report unified memory per device
     dt_sys_resources_t *res = &darktable.dtresources;
     for(int i = 0; i < cl->num_devs; i++)
     {
@@ -3598,9 +3607,9 @@ void dt_opencl_memory_statistics(int devid,
   {
     dt_print(DT_DEBUG_OPENCL,"[opencl memory] device '%s' id=%d: %.1fMB in use, %.1fMB available GPU mem of %.1fMB",
              cl->dev[devid].fullname, devid,
-             (float)cl->dev[devid].memory_in_use/(1024*1024),
-             (float)cl->dev[devid].used_available/(1024*1024),
-             (float)cl->dev[devid].max_global_mem/(1024*1024));
+             (float)cl->dev[devid].memory_in_use / DT_MEGA,
+             (float)cl->dev[devid].used_available / DT_MEGA,
+             (float)cl->dev[devid].max_global_mem / DT_MEGA);
       if(cl->dev[devid].memory_in_use > darktable.opencl->dev[devid].used_available)
       {
         dt_print(DT_DEBUG_OPENCL,
diff --git a/src/common/opencl.h b/src/common/opencl.h
index 34a188d8abaa..c4cb33d40720 100644
--- a/src/common/opencl.h
+++ b/src/common/opencl.h
@@ -151,6 +151,7 @@ typedef struct dt_opencl_device_t
   const char *cname;
   const char *options;
   const char *cflags;
+  const char *avoid;
   cl_int summary;
   size_t memory_in_use;
   size_t peak_memory;
@@ -211,8 +212,6 @@ typedef struct dt_opencl_device_t
 
   // lets keep the vendor for runtime checks
   int vendor_id;
-
-  float advantage;
 } dt_opencl_device_t;
 
 struct dt_bilateral_cl_global_t;
diff --git a/src/develop/pixelpipe_hb.c b/src/develop/pixelpipe_hb.c
index 599b0a4fc84f..0675bc4472e5 100644
--- a/src/develop/pixelpipe_hb.c
+++ b/src/develop/pixelpipe_hb.c
@@ -1694,6 +1694,13 @@ static void _opencl_dump_diff_pipe_pfm(dt_dev_pixelpipe_t *pipe,
     dt_free_align(clin);
   }
 }
+
+static inline gboolean _avoid_cl_module(const dt_dev_pixelpipe_iop_t *piece)
+{
+  const dt_opencl_device_t *cldid = &darktable.opencl->dev[piece->pipe->devid];
+  return cldid->avoid && dt_str_commasubstring(cldid->avoid, piece->module->op);
+}
+
 #endif
 
 static inline gboolean _skip_piece_on_tags(const dt_dev_pixelpipe_iop_t *piece)
@@ -2140,7 +2147,8 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
     gboolean possible_cl =
         module->process_cl
         && piece->process_cl_ready
-        && !(dt_pipe_is_preview(pipe) && (module->flags() & IOP_FLAGS_PREVIEW_NON_OPENCL));
+        && !(dt_pipe_is_preview(pipe) && (module->flags() & IOP_FLAGS_PREVIEW_NON_OPENCL))
+        && !_avoid_cl_module(piece);
 
     const uint32_t m_bpp = MAX(in_bpp, bpp);
     const size_t m_width = MAX(roi_in.width, roi_out->width);
@@ -2154,24 +2162,6 @@ static gboolean _dev_pixelpipe_process_rec(dt_dev_pixelpipe_t *pipe,
     {
       if(!_piece_may_tile(piece))
         possible_cl = FALSE;
-
-      const float advantage = darktable.opencl->dev[pipe->devid].advantage;
-      if(possible_cl && (advantage > 0.0f))
-      {
-        const float tilemem_cl = dt_tiling_estimate_clmem(&tiling, piece,
-                                                          &roi_in, roi_out, m_bpp);
-        const float tilemem_cpu = dt_tiling_estimate_cpumem(&tiling, piece,
-                                                            &roi_in, roi_out, m_bpp);
-        if((tilemem_cpu * advantage) < tilemem_cl)
-        {
-          dt_print(DT_DEBUG_OPENCL | DT_DEBUG_TILING,
-                   "[dt_dev_pixelpipetiling_cl] [%s] estimates cpu"
-                   " advantage in `%s', (dev=%i, adv=%.2f, GPU %.2f CPU %.2f)",
-                   dt_dev_pixelpipe_type_to_str(pipe->type), module->op, pipe->devid,
-                   advantage, tilemem_cl / 1e9, tilemem_cpu / 1e9);
-          possible_cl = FALSE;
-        }
-      }
     }
 
     if(possible_cl)
diff --git a/src/develop/tiling.c b/src/develop/tiling.c
index 8a5f9d9f82e6..af24e6311d2b 100644
--- a/src/develop/tiling.c
+++ b/src/develop/tiling.c
@@ -1181,129 +1181,7 @@ void default_process_tiling(dt_iop_module_t *self,
   return;
 }
 
-float dt_tiling_estimate_cpumem(const dt_develop_tiling_t *tiling,
-                                const dt_dev_pixelpipe_iop_t *piece,
-                                const dt_iop_roi_t *const roi_in,
-                                const dt_iop_roi_t *const roi_out,
-                                const int max_bpp)
-{
-  const int m_dx = MAX(roi_in->width, roi_out->width);
-  const int m_dy = MAX(roi_in->height, roi_out->height);
-  if(dt_tiling_piece_fits_host_memory(piece, m_dx, m_dy, max_bpp, tiling->factor, tiling->overhead))
-    return (float)m_dx * m_dy * max_bpp * tiling->factor + tiling->overhead;
-
-  const float fullscale = fmaxf(roi_in->scale / roi_out->scale, sqrtf(((float)roi_in->width * roi_in->height)
-                                                              / ((float)roi_out->width * roi_out->height)));
-  float available = dt_get_available_pipe_mem(piece->pipe);
-  available = fmaxf(available - ((float)roi_out->width * roi_out->height * max_bpp)
-                   - ((float)roi_in->width * roi_in->height * max_bpp) - tiling->overhead, 0.0f);
-
-  float singlebuffer = dt_get_singlebuffer_mem();
-  const float factor = fmaxf(tiling->factor, 1.0f);
-  const float maxbuf = fmaxf(tiling->maxbuf, 1.0f);
-  singlebuffer = fmaxf(available / factor, singlebuffer);
-
-  int width = MAX(roi_in->width, roi_out->width);
-  int height = MAX(roi_in->height, roi_out->height);
-
-  const unsigned int align = tiling->align;
-  if((float)width * height * max_bpp * maxbuf > singlebuffer)
-  {
-    const float scale = singlebuffer / ((float)width * height * max_bpp * maxbuf);
-    if(width < height && scale >= 0.333f)
-       height = _align_down((int)floorf(height * scale), align);
-    else if(height <= width && scale >= 0.333f)
-      width = _align_down((int)floorf(width * scale), align);
-    else
-    {
-      width = _align_down((int)floorf(width * sqrtf(scale)), align);
-      height = _align_down((int)floorf(height * sqrtf(scale)), align);
-    }
-  }
-
-  if(3 * tiling->overlap > width || 3 * tiling->overlap > height)
-    width = height = _align_down((int)floorf(sqrtf((float)width * height)), align);
-  const int overlap_in = _align_up(tiling->overlap, align);
-  const int overlap_out = ceilf((float)overlap_in / fullscale);
-
-  int tiles_x = 1, tiles_y = 1;
-
-  if(roi_in->width > roi_out->width)
-    tiles_x = (width < roi_in->width) ? ceilf((float)roi_in->width / (float)MAX(width - 2 * overlap_in, 1)) : 1;
-  else
-    tiles_x = (width < roi_out->width) ? ceilf((float)roi_out->width / (float)MAX(width - 2 * overlap_out, 1)) : 1;
-
-  if(roi_in->height > roi_out->height)
-    tiles_y = (height < roi_in->height) ? ceilf((float)roi_in->height / (float)MAX(height - 2 * overlap_in, 1)) : 1;
-  else
-    tiles_y = (height < roi_out->height) ? ceilf((float)roi_out->height / (float)MAX(height - 2 * overlap_out, 1)) : 1;
-  dt_print(DT_DEBUG_TILING, "tilex = %i, tiley = %i", tiles_x, tiles_y);
-  return (float)tiles_x * tiles_y * singlebuffer ;
-}
-
 #ifdef HAVE_OPENCL
-float dt_tiling_estimate_clmem(const dt_develop_tiling_t *tiling,
-                               const dt_dev_pixelpipe_iop_t *piece,
-                               const dt_iop_roi_t *const roi_in,
-                               const dt_iop_roi_t *const roi_out,
-                               const int max_bpp)
-{
-  const int devid = piece->pipe->devid;
-  const float fullscale = fmaxf(roi_in->scale / roi_out->scale, sqrtf(((float)roi_in->width * roi_in->height)
-                                                              / ((float)roi_out->width * roi_out->height)));
-  const gboolean use_pinned_memory = dt_opencl_use_pinned_memory(devid);
-  /* If using pinned transfer on devices with dedicated GPU mem there is an additional
-     mem pressure as they will allocate also on device as cache for performance
-  */
-  const float pinned_buffer_overhead = use_pinned_memory && !dt_opencl_unified_memory(devid) ? 2.0f : 0.0f;
-  const float pinned_buffer_slack = use_pinned_memory ? 0.85f : 1.0f;
-  const float available = (float)dt_opencl_get_device_available(devid);
-  const float factor = fmaxf(tiling->factor_cl + pinned_buffer_overhead, 1.0f);
-  const float singlebuffer = fminf(fmaxf((available - tiling->overhead) / factor, 0.0f),
-                                  pinned_buffer_slack * (float)(dt_opencl_get_device_memalloc(devid)));
-  const float maxbuf = fmaxf(tiling->maxbuf_cl, 1.0f);
-
-  int width = MIN(MAX(roi_in->width, roi_out->width), darktable.opencl->dev[devid].max_image_width);
-  int height = MIN(MAX(roi_in->height, roi_out->height), darktable.opencl->dev[devid].max_image_height);
-
-  const unsigned int align = _lcm(tiling->align, dt_opencl_tiling_align(devid));
-
-  if((float)width * height * max_bpp * maxbuf > singlebuffer)
-  {
-    const float scale = singlebuffer / ((float)width * height * max_bpp * maxbuf);
-
-    if(width < height && scale >= 0.333f)
-       height = _align_down((int)floorf(height * scale), align);
-     else if(height <= width && scale >= 0.333f)
-       width = _align_down((int)floorf(width * scale), align);
-     else
-    {
-      width = _align_down((int)floorf(width * sqrtf(scale)), align);
-      height = _align_down((int)floorf(height * sqrtf(scale)), align);
-    }
-  }
-
-  if(3 * tiling->overlap > width || 3 * tiling->overlap > height)
-    width = height = _align_down((int)floorf(sqrtf((float)width * height)), align);
-
-  const int overlap_in = _align_up(tiling->overlap, align);
-  const int overlap_out = ceilf((float)overlap_in / fullscale);
-
-  int tiles_x = 1, tiles_y = 1;
-
-  if(roi_in->width > roi_out->width)
-    tiles_x = (width < roi_in->width) ? ceilf((float)roi_in->width / (float)MAX(width - 2 * overlap_in, 1)) : 1;
-  else
-    tiles_x = (width < roi_out->width) ? ceilf((float)roi_out->width / (float)MAX(width - 2 * overlap_out, 1)) : 1;
-
-  if(roi_in->height > roi_out->height)
-    tiles_y = (height < roi_in->height) ? ceilf((float)roi_in->height / (float)MAX(height - 2 * overlap_in, 1)) : 1;
-  else
-    tiles_y = (height < roi_out->height) ? ceilf((float)roi_out->height / (float)MAX(height - 2 * overlap_out, 1)) : 1;
-
-  return (float)tiles_x * tiles_y * singlebuffer * factor;
-}
-
 /* simple tiling algorithm for roi_in == roi_out, i.e. for pixel to pixel modules/operations */
 static int _default_process_tiling_cl_ptp(dt_iop_module_t *self,
                                           dt_dev_pixelpipe_iop_t *piece,
diff --git a/src/develop/tiling.h b/src/develop/tiling.h
index 8e5100f3fe2b..774e229b7ede 100644
--- a/src/develop/tiling.h
+++ b/src/develop/tiling.h
@@ -71,15 +71,6 @@ void tiling_callback(struct dt_iop_module_t *self, struct dt_dev_pixelpipe_iop_t
 gboolean dt_tiling_piece_fits_host_memory(const struct dt_dev_pixelpipe_iop_t *piece, const size_t width, const size_t height, const unsigned bpp,
                                      const float factor, const size_t overhead);
 
-float dt_tiling_estimate_cpumem(const dt_develop_tiling_t *tiling, const struct dt_dev_pixelpipe_iop_t *piece,
-                                        const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
-                                        const int max_bpp);
-
-#ifdef HAVE_OPENCL
-float dt_tiling_estimate_clmem(const dt_develop_tiling_t *tiling, const struct dt_dev_pixelpipe_iop_t *piece,
-                                          const dt_iop_roi_t *const roi_in, const dt_iop_roi_t *const roi_out,
-                                          const int max_bpp);
-#endif
 // clang-format off
 // modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
 // vim: shiftwidth=2 expandtab tabstop=2 cindent