diff --git a/src/Target.cpp b/src/Target.cpp
index 8310bb6ccd33..4513a6feb42a 100644
--- a/src/Target.cpp
+++ b/src/Target.cpp
@@ -1304,17 +1304,28 @@ int Target::get_arm_v8_lower_bound() const {
 }
 
 bool Target::supports_type(const Type &t) const {
+    if (has_feature(Vulkan)) {
+        if (t.is_float() && t.bits() == 64) {
+            return has_feature(Target::VulkanFloat64);
+        } else if (t.is_float() && t.bits() == 16) {
+            return has_feature(Target::VulkanFloat16);
+        } else if (t.is_int_or_uint() && t.bits() == 64) {
+            return has_feature(Target::VulkanInt64);
+        } else if (t.is_int_or_uint() && t.bits() == 16) {
+            return has_feature(Target::VulkanInt16);
+        } else if (t.is_int_or_uint() && t.bits() == 8) {
+            return has_feature(Target::VulkanInt8);
+        }
+    }
     if (t.bits() == 64) {
         if (t.is_float()) {
             return (!has_feature(Metal) &&
                     !has_feature(D3D12Compute) &&
                     (!has_feature(Target::OpenCL) || has_feature(Target::CLDoubles)) &&
-                    (!has_feature(Vulkan) || has_feature(Target::VulkanFloat64)) &&
                     !has_feature(WebGPU));
         } else {
             return (!has_feature(Metal) &&
                     !has_feature(D3D12Compute) &&
-                    (!has_feature(Vulkan) || has_feature(Target::VulkanInt64)) &&
                     !has_feature(WebGPU));
         }
     }
diff --git a/src/runtime/internal/memory_resources.h b/src/runtime/internal/memory_resources.h
index 0be6041519a1..e543d0183ffc 100644
--- a/src/runtime/internal/memory_resources.h
+++ b/src/runtime/internal/memory_resources.h
@@ -151,7 +151,7 @@ ALWAYS_INLINE size_t aligned_size(size_t offset, size_t size, size_t alignment)
 ALWAYS_INLINE size_t conform_size(size_t offset, size_t size, size_t alignment, size_t nearest_multiple) {
     size_t adjusted_size = aligned_size(offset, size, alignment);
     adjusted_size = (alignment > adjusted_size) ? alignment : adjusted_size;
-    if (nearest_multiple > 0) {
+    if ((nearest_multiple > 0) && ((adjusted_size % nearest_multiple) != 0)) {
         size_t rounded_size = (((adjusted_size + nearest_multiple - 1) / nearest_multiple) * nearest_multiple);
         return rounded_size;
     } else {
diff --git a/src/runtime/internal/region_allocator.h b/src/runtime/internal/region_allocator.h
index 6f39991ff79c..e52eb71171de 100644
--- a/src/runtime/internal/region_allocator.h
+++ b/src/runtime/internal/region_allocator.h
@@ -74,7 +74,7 @@ class RegionAllocator {
     BlockRegion *coalesce_block_regions(void *user_context, BlockRegion *region);
 
     // Returns true if the given region can be split to accomodate the given size
-    bool can_split(const BlockRegion *region, const MemoryRequest &request) const;
+    bool can_split(void *use_context, const BlockRegion *region, const MemoryRequest &request) const;
 
     // Splits the given block region into a smaller region to accomodate the given size, followed by empty space for the remaining
     BlockRegion *split_block_region(void *user_context, BlockRegion *region, const MemoryRequest &request);
@@ -195,7 +195,7 @@ MemoryRegion *RegionAllocator::reserve(void *user_context, const MemoryRequest &
         return nullptr;
     }
 
-    if (can_split(block_region, region_request)) {
+    if (can_split(user_context, block_region, region_request)) {
 #ifdef DEBUG_RUNTIME_INTERNAL
         debug(user_context) << "RegionAllocator: Splitting region of size ( " << (int32_t)(block_region->memory.size) << ") "
                             << "to accomodate requested size (" << (int32_t)(region_request.size) << " bytes)";
@@ -443,8 +443,29 @@ BlockRegion *RegionAllocator::coalesce_block_regions(void *user_context, BlockRe
     return block_region;
 }
 
-bool RegionAllocator::can_split(const BlockRegion *block_region, const MemoryRequest &split_request) const {
-    return (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0));
+bool RegionAllocator::can_split(void *user_context, const BlockRegion *block_region, const MemoryRequest &split_request) const {
+
+    // See if we can actually split the block region and create empty space big enough
+    if (block_region && (block_region->memory.size > split_request.size) && (block_region->usage_count == 0)) {
+
+        // We can only split if there's still room left after conforming the allocation request since the
+        // conform method may actually grow the requested size to accomodate alignment constraints
+        MemoryRequest test_request = split_request;
+        test_request.size = block_region->memory.size - test_request.size;
+        test_request.offset = block_region->memory.offset + test_request.size;
+        int error_code = conform(user_context, &test_request);
+        if (error_code) {
+#ifdef DEBUG_RUNTIME_INTERNAL
+            debug(nullptr) << "RegionAllocator: Failed to conform test request for splitting block region!\n";
+#endif
+            return false;
+        }
+
+        if ((block_region->memory.size - test_request.size) > 0) {
+            return true;
+        }
+    }
+    return false;
 }
 
 BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion *block_region, const MemoryRequest &request) {
@@ -470,8 +491,9 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion
 
 #ifdef DEBUG_RUNTIME_INTERNAL
     debug(user_context) << "RegionAllocator: Splitting "
-                        << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) "
-                        << "to create empty region (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)";
+                        << "current region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes) into ...\n\t"
+                        << "existing region (offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size - split_request.size) << " bytes)\n\t"
+                        << "empty region    (offset=" << (int32_t)split_request.offset << " size=" << (int32_t)(split_request.size) << " bytes)\n";
 #endif
     BlockRegion *next_region = block_region->next_ptr;
     BlockRegion *empty_region = create_block_region(user_context, split_request);
@@ -484,6 +506,12 @@ BlockRegion *RegionAllocator::split_block_region(void *user_context, BlockRegion
     empty_region->prev_ptr = block_region;
     block_region->next_ptr = empty_region;
     block_region->memory.size -= empty_region->memory.size;
+
+#ifdef DEBUG_RUNTIME_INTERNAL
+    debug(user_context) << "RegionAllocator: Split block region into ...\n\t"
+                        << "existing region (ptr=" << (void *)block_region << " prev_ptr=" << block_region->prev_ptr << " next_ptr=" << block_region->next_ptr << " offset=" << (int32_t)block_region->memory.offset << " size=" << (int32_t)(block_region->memory.size) << " bytes)\n\t"
+                        << "empty region    (ptr=" << (void *)empty_region << " prev_ptr=" << empty_region->prev_ptr << " next_ptr=" << empty_region->next_ptr << " offset=" << (int32_t)empty_region->memory.offset << " size=" << (int32_t)(empty_region->memory.size) << " bytes)\n";
+#endif
     return empty_region;
 }
 
@@ -605,8 +633,22 @@ int RegionAllocator::alloc_block_region(void *user_context, BlockRegion *block_r
 #endif
     halide_abort_if_false(user_context, allocators.region.allocate != nullptr);
     halide_abort_if_false(user_context, block_region->status == AllocationStatus::Available);
+
     int error_code = 0;
     MemoryRegion *memory_region = &(block_region->memory);
+    if (memory_region->size <= 0) {
+#ifdef DEBUG_RUNTIME_INTERNAL
+        debug(user_context) << "    skipping zero size region ("
+                            << "block_ptr=" << (void *)block_region->block_ptr << " "
+                            << "block_region=" << (void *)block_region << " "
+                            << "memory_offset=" << (uint32_t)(block_region->memory.offset) << " "
+                            << "memory_size=" << (uint32_t)(block_region->memory.size) << " "
+                            << "block_reserved=" << (uint32_t)block->reserved << " "
+                            << ")\n";
+#endif
+        return error_code;
+    }
+
     if (memory_region->handle == nullptr) {
         error_code = allocators.region.allocate(user_context, memory_region);
         memory_region->is_owner = true;
diff --git a/src/runtime/vulkan.cpp b/src/runtime/vulkan.cpp
index 44bb40f99fa2..ca0473e80b91 100644
--- a/src/runtime/vulkan.cpp
+++ b/src/runtime/vulkan.cpp
@@ -1189,13 +1189,6 @@ WEAK int halide_vulkan_run(void *user_context,
                 }
             }
         }
-
-        // 2b. Create the pipeline layout
-        error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
-        if (error_code != halide_error_code_success) {
-            error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
-            return error_code;
-        }
     }
 
     VulkanDispatchData dispatch_data = {};
@@ -1209,16 +1202,8 @@ WEAK int halide_vulkan_run(void *user_context,
 
     VulkanShaderBinding *entry_point_binding = (shader_module->shader_bindings + entry_point_index);
 
-    // 2c. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
-    error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
-    if (error_code != halide_error_code_success) {
-        error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
-        return error_code;
-    }
-
-    // 2d. Create a descriptor set
-    if (entry_point_binding->descriptor_set == VK_NULL_HANDLE) {
-
+    // 2c. If Push Descriptor Set isn't supported, then allocate a descriptor set
+    if ((vkCmdPushDescriptorSetKHR == nullptr) && (entry_point_binding->descriptor_set == VK_NULL_HANDLE)) {
         // Construct a descriptor pool
         //
         // NOTE: while this could be re-used across multiple pipelines, we only know the storage requirements of this kernel's
@@ -1240,7 +1225,7 @@ WEAK int halide_vulkan_run(void *user_context,
         }
     }
 
-    // 3a. Create a buffer for the scalar parameters
+    // 2d. Create a buffer for the scalar parameters
     if ((entry_point_binding->args_region == nullptr) && entry_point_binding->uniform_buffer_count) {
         size_t scalar_buffer_size = vk_estimate_scalar_uniform_buffer_size(user_context, arg_sizes, args, arg_is_buffer);
         if (scalar_buffer_size > 0) {
@@ -1252,7 +1237,7 @@ WEAK int halide_vulkan_run(void *user_context,
         }
     }
 
-    // 3b. Update uniform buffer with scalar parameters
+    // 2e. Update uniform buffer with scalar parameters
     VkBuffer *args_buffer = nullptr;
     if ((entry_point_binding->args_region != nullptr) && entry_point_binding->uniform_buffer_count) {
         error_code = vk_update_scalar_uniform_buffer(user_context, ctx.allocator, entry_point_binding->args_region, arg_sizes, args, arg_is_buffer);
@@ -1268,10 +1253,28 @@ WEAK int halide_vulkan_run(void *user_context,
         }
     }
 
-    // 3c. Update buffer bindings for descriptor set
-    error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
+    // 2f. If Push Descriptor Set isn't supported, then update the buffer bindings for the allocated descriptor set
+    if (vkCmdPushDescriptorSetKHR == nullptr) {
+        error_code = vk_update_descriptor_set(user_context, ctx.allocator, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer, entry_point_binding->descriptor_set);
+        if (error_code != halide_error_code_success) {
+            error(user_context) << "Vulkan: Failed to update descriptor set!\n";
+            return error_code;
+        }
+    }
+
+    // 2b. Create the pipeline layout
+    if (shader_module->pipeline_layout == VK_NULL_HANDLE) {
+        error_code = vk_create_pipeline_layout(user_context, ctx.allocator, shader_module->shader_count, shader_module->descriptor_set_layouts, &(shader_module->pipeline_layout));
+        if (error_code != halide_error_code_success) {
+            error(user_context) << "Vulkan: Failed to create pipeline layout!\n";
+            return error_code;
+        }
+    }
+
+    // 3. Setup the compute pipeline (eg override any specializations for shared mem or workgroup size)
+    error_code = vk_setup_compute_pipeline(user_context, ctx.allocator, entry_point_binding, &dispatch_data, shader_module->shader_module, shader_module->pipeline_layout, &(entry_point_binding->compute_pipeline));
     if (error_code != halide_error_code_success) {
-        error(user_context) << "Vulkan: Failed to update descriptor set!\n";
+        error(user_context) << "Vulkan: Failed to setup compute pipeline!\n";
         return error_code;
     }
 
@@ -1283,18 +1286,49 @@ WEAK int halide_vulkan_run(void *user_context,
     }
 
     // 5. Fill the command buffer
-    error_code = vk_fill_command_buffer_with_dispatch_call(user_context,
-                                                           ctx.device, cmds.command_buffer,
-                                                           entry_point_binding->compute_pipeline,
-                                                           shader_module->pipeline_layout,
-                                                           entry_point_binding->descriptor_set,
-                                                           entry_point_index,
-                                                           blocksX, blocksY, blocksZ);
+    error_code = vk_begin_command_buffer(user_context, cmds.command_buffer);
+    if (error_code != halide_error_code_success) {
+        error(user_context) << "Vulkan: Failed to start command buffer for dispatch call!\n";
+        return error_code;
+    }
+    error_code = vk_bind_pipeline(user_context, cmds.command_buffer, entry_point_binding->compute_pipeline);
+    if (error_code != halide_error_code_success) {
+        error(user_context) << "Vulkan: Failed to bind compute pipeline to command buffer for dispatch call!\n";
+        return error_code;
+    }
+
+    if (vkCmdPushDescriptorSetKHR != nullptr) {
+        error_code = vk_push_descriptor_set(user_context, ctx.allocator, cmds.command_buffer, entry_point_binding->compute_pipeline, shader_module->pipeline_layout, entry_point_binding->descriptor_set, args_buffer, entry_point_binding->uniform_buffer_count, entry_point_binding->storage_buffer_count, arg_sizes, args, arg_is_buffer);
+        if (error_code != halide_error_code_success) {
+            error(user_context) << "Vulkan: Failed to update descriptor set!\n";
+            return error_code;
+        }
+    } else {
+        error_code = vk_bind_descriptor_sets(user_context, cmds.command_buffer, shader_module->pipeline_layout, entry_point_binding->descriptor_set, entry_point_index);
+        if (error_code != halide_error_code_success) {
+            error(user_context) << "Vulkan: Failed to bind descriptor set to command buffer for dispatch call!\n";
+            return error_code;
+        }
+    }
+
+    error_code = vk_dispatch_kernel(user_context,
+                                    ctx.device, cmds.command_buffer,
+                                    entry_point_binding->compute_pipeline,
+                                    shader_module->pipeline_layout,
+                                    entry_point_binding->descriptor_set,
+                                    entry_point_index,
+                                    blocksX, blocksY, blocksZ);
     if (error_code != halide_error_code_success) {
         error(user_context) << "Vulkan: Failed to fill command buffer with dispatch call!\n";
         return error_code;
     }
 
+    error_code = vk_end_command_buffer(user_context, cmds.command_buffer);
+    if (error_code != halide_error_code_success) {
+        error(user_context) << "Vulkan: Failed to end command buffer for dispatch call!\n";
+        return error_code;
+    }
+
     // 6. Submit the command buffer to our command queue
     error_code = vk_submit_command_buffer(user_context, ctx.queue, cmds.command_buffer);
     if (error_code != halide_error_code_success) {
diff --git a/src/runtime/vulkan_extensions.h b/src/runtime/vulkan_extensions.h
index c2e4ea42ff1e..68fa3662c39e 100644
--- a/src/runtime/vulkan_extensions.h
+++ b/src/runtime/vulkan_extensions.h
@@ -203,10 +203,18 @@ uint32_t vk_get_required_device_extensions(void *user_context, StringTable &ext_
 uint32_t vk_get_optional_device_extensions(void *user_context, StringTable &ext_table) {
     const char *optional_ext_table[] = {
         "VK_KHR_portability_subset",  //< necessary for running under Molten (aka Vulkan on Mac)
+        VK_KHR_MAINTENANCE_1_EXTENSION_NAME,
+        VK_KHR_MAINTENANCE_2_EXTENSION_NAME,
+        VK_KHR_MAINTENANCE_3_EXTENSION_NAME,
+        VK_KHR_MAINTENANCE_4_EXTENSION_NAME,
         VK_KHR_MAINTENANCE_5_EXTENSION_NAME,
+        VK_KHR_MAINTENANCE_6_EXTENSION_NAME,
+        VK_KHR_MAINTENANCE_7_EXTENSION_NAME,
         VK_KHR_16BIT_STORAGE_EXTENSION_NAME,
         VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
-        VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME};
+        VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME,
+        VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME};
+
     const uint32_t optional_ext_count = sizeof(optional_ext_table) / sizeof(optional_ext_table[0]);
     ext_table.fill(user_context, (const char **)optional_ext_table, optional_ext_count);
     return optional_ext_count;
diff --git a/src/runtime/vulkan_functions.h b/src/runtime/vulkan_functions.h
index 90938896f4cc..1339e0818a03 100644
--- a/src/runtime/vulkan_functions.h
+++ b/src/runtime/vulkan_functions.h
@@ -204,6 +204,7 @@ VULKAN_FN(vkCmdCopyBuffer2)
 // VULKAN_FN(vkCmdCopyImageToBuffer2)
 // VULKAN_FN(vkCmdEndRendering)
 VULKAN_FN(vkCmdPipelineBarrier2)
+VULKAN_FN(vkCmdPushDescriptorSetKHR)
 VULKAN_FN(vkCmdResetEvent2)
 // VULKAN_FN(vkCmdResolveImage2)
 // VULKAN_FN(vkCmdSetCullMode)
diff --git a/src/runtime/vulkan_internal.h b/src/runtime/vulkan_internal.h
index aeef545385cc..821db25dc98e 100644
--- a/src/runtime/vulkan_internal.h
+++ b/src/runtime/vulkan_internal.h
@@ -106,14 +106,19 @@ int vk_destroy_command_buffer(void *user_context, VulkanMemoryAllocator *allocat
 
 struct ScopedVulkanCommandBufferAndPool;
 
-int vk_fill_command_buffer_with_dispatch_call(void *user_context,
-                                              VkDevice device,
-                                              VkCommandBuffer command_buffer,
-                                              VkPipeline compute_pipeline,
-                                              VkPipelineLayout pipeline_layout,
-                                              VkDescriptorSet descriptor_set,
-                                              uint32_t descriptor_set_index,
-                                              int blocksX, int blocksY, int blocksZ);
+int vk_begin_command_buffer(void *user_context, VkCommandBuffer command_buffer);
+int vk_end_command_buffer(void *user_context, VkCommandBuffer command_buffer);
+int vk_bind_pipeline(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline);
+int vk_bind_descriptor_sets_to_command_buffer(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline);
+
+int vk_dispatch_kernel(void *user_context,
+                       VkDevice device,
+                       VkCommandBuffer command_buffer,
+                       VkPipeline compute_pipeline,
+                       VkPipelineLayout pipeline_layout,
+                       VkDescriptorSet descriptor_set,
+                       uint32_t descriptor_set_index,
+                       int blocksX, int blocksY, int blocksZ);
 
 int vk_submit_command_buffer(void *user_context, VkQueue queue, VkCommandBuffer command_buffer);
 
@@ -175,6 +180,24 @@ int vk_create_descriptor_set(void *user_context,
                              VkDescriptorPool descriptor_pool,
                              VkDescriptorSet *descriptor_set);
 
+int vk_get_descriptor_buffer_info(void *user_context,
+                                  VulkanMemoryAllocator *allocator,
+                                  VkDescriptorSet descriptor_set,
+                                  VkBuffer *scalar_args_buffer,
+                                  size_t uniform_buffer_count,
+                                  size_t storage_buffer_count,
+                                  size_t arg_sizes[],
+                                  void *args[],
+                                  int8_t arg_is_buffer[],
+                                  BlockStorage *descriptor_buffer_info_result);
+
+int vk_get_write_descriptor_set_info(void *user_context,
+                                     VulkanMemoryAllocator *allocator,
+                                     BlockStorage *descriptor_buffer_info,
+                                     VkDescriptorSet descriptor_set,
+                                     VkBuffer *scalar_args_buffer,
+                                     BlockStorage *write_descriptor_set_result);
+
 int vk_update_descriptor_set(void *user_context,
                              VulkanMemoryAllocator *allocator,
                              VkBuffer *scalar_args_buffer,
@@ -185,6 +208,25 @@ int vk_update_descriptor_set(void *user_context,
                              int8_t arg_is_buffer[],
                              VkDescriptorSet descriptor_set);
 
+int vk_bind_descriptor_sets(void *user_context,
+                            VkCommandBuffer command_buffer,
+                            VkPipelineLayout pipeline_layout,
+                            VkDescriptorSet descriptor_set,
+                            uint32_t descriptor_set_index);
+
+int vk_push_descriptor_set(void *user_context,
+                           VulkanMemoryAllocator *allocator,
+                           VkCommandBuffer command_buffer,
+                           VkPipeline compute_pipeline,
+                           VkPipelineLayout pipeline_layout,
+                           VkDescriptorSet descriptor_set,
+                           VkBuffer *scalar_args_buffer,
+                           size_t uniform_buffer_count,
+                           size_t storage_buffer_count,
+                           size_t arg_sizes[],
+                           void *args[],
+                           int8_t arg_is_buffer[]);
+
 // -- Pipeline Layout
 int vk_create_pipeline_layout(void *user_context,
                               VulkanMemoryAllocator *allocator,
diff --git a/src/runtime/vulkan_memory.h b/src/runtime/vulkan_memory.h
index 9b32de9a15c0..b65f076e8aad 100644
--- a/src/runtime/vulkan_memory.h
+++ b/src/runtime/vulkan_memory.h
@@ -556,7 +556,7 @@ int VulkanMemoryAllocator::lookup_requirements(void *user_context, size_t size,
 #if defined(HL_VK_DEBUG_MEM)
     debug(nullptr) << "VulkanMemoryAllocator: Looking up requirements ("
                    << "user_context=" << user_context << " "
-                   << "size=" << (uint32_t)block->size << ", "
+                   << "size=" << (uint32_t)size << ", "
                    << "usage_flags=" << usage_flags << ") ... \n";
 #endif
     VkBufferCreateInfo create_info = {
@@ -998,7 +998,7 @@ int VulkanMemoryAllocator::conform(void *user_context, MemoryRequest *request) {
 
 #if defined(HL_VK_DEBUG_MEM)
     debug(nullptr) << "VulkanMemoryAllocator: Buffer requirements ("
-                   << "requested_size=" << (uint32_t)region->size << ", "
+                   << "requested_size=" << (uint32_t)request->size << ", "
                    << "required_alignment=" << (uint32_t)memory_requirements.alignment << ", "
                    << "required_size=" << (uint32_t)memory_requirements.size << ")\n";
 #endif
@@ -1051,7 +1051,7 @@ int VulkanMemoryAllocator::conform_region_request(void *instance_ptr, MemoryRequ
 #if defined(HL_VK_DEBUG_MEM)
     debug(nullptr) << "VulkanMemoryAllocator: Conforming region request ("
                    << "user_context=" << user_context << " "
-                   << "request=" << (void *)(region) << ") ... \n";
+                   << "request=" << (void *)(request) << ") ... \n";
 #endif
 
     if ((instance->device == nullptr) || (instance->physical_device == nullptr)) {
@@ -1125,6 +1125,9 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
 
     VkResult result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer);
     if (result != VK_SUCCESS) {
+        debug(user_context) << "VulkanRegionAllocator: Failed to create buffer!\n\t"
+                            << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n";
+
         // Allocation failed ... collect unused regions and try again ...
         instance->collect(user_context);
         result = vkCreateBuffer(instance->device, &create_info, instance->alloc_callbacks, buffer);
@@ -1165,12 +1168,9 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
                                 << "vkCreateBuffer returned: " << vk_get_error_name(result) << "\n";
             return halide_error_code_device_malloc_failed;
         }
+        region->size = create_info.size;
     }
 
-#ifdef DEBUG_RUNTIME
-    debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n";
-#endif
-
     RegionAllocator *region_allocator = RegionAllocator::find_allocator(user_context, region);
     if (region_allocator == nullptr) {
         error(user_context) << "VulkanBlockAllocator: Unable to allocate region! Invalid region allocator!\n";
@@ -1189,6 +1189,10 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
         return halide_error_code_internal_error;
     }
 
+#ifdef DEBUG_RUNTIME
+    debug(nullptr) << "vkCreateBuffer: Created buffer for device region (" << (uint64_t)region->size << " bytes) ...\n";
+#endif
+
     // Finally, bind buffer to the device memory
     result = vkBindBufferMemory(instance->device, *buffer, *device_memory, region->offset);
     if (result != VK_SUCCESS) {
@@ -1197,6 +1201,10 @@ int VulkanMemoryAllocator::allocate_region(void *instance_ptr, MemoryRegion *reg
         return halide_error_code_generic_error;
     }
 
+#ifdef DEBUG_RUNTIME
+    debug(nullptr) << "vkBindBufferMemory: Bound memory to device buffer for device region (" << (uint64_t)region->size << " bytes) ...\n";
+#endif
+
     region->handle = (void *)buffer;
     region->is_owner = true;
     instance->region_byte_count += region->size;
diff --git a/src/runtime/vulkan_resources.h b/src/runtime/vulkan_resources.h
index e5e8f827d4b9..d0e1f109c92f 100644
--- a/src/runtime/vulkan_resources.h
+++ b/src/runtime/vulkan_resources.h
@@ -202,24 +202,11 @@ struct ScopedVulkanCommandBufferAndPool {
     }
 };
 
-int vk_fill_command_buffer_with_dispatch_call(void *user_context,
-                                              VkDevice device,
-                                              VkCommandBuffer command_buffer,
-                                              VkPipeline compute_pipeline,
-                                              VkPipelineLayout pipeline_layout,
-                                              VkDescriptorSet descriptor_set,
-                                              uint32_t descriptor_set_index,
-                                              int blocksX, int blocksY, int blocksZ) {
-
+int vk_begin_command_buffer(void *user_context, VkCommandBuffer command_buffer) {
 #ifdef DEBUG_RUNTIME
     debug(user_context)
-        << " vk_fill_command_buffer_with_dispatch_call (user_context: " << user_context << ", "
-        << "device: " << (void *)device << ", "
-        << "command_buffer: " << (void *)command_buffer << ", "
-        << "pipeline_layout: " << (void *)pipeline_layout << ", "
-        << "descriptor_set: " << (void *)descriptor_set << ", "
-        << "descriptor_set_index: " << descriptor_set_index << ", "
-        << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n";
+        << " vk_begin_command_buffer (user_context: " << user_context << ", "
+        << "command_buffer: " << (void *)command_buffer << ")\n";
 #endif
 
     VkCommandBufferBeginInfo command_buffer_begin_info = {
@@ -234,18 +221,56 @@ int vk_fill_command_buffer_with_dispatch_call(void *user_context,
         error(user_context) << "vkBeginCommandBuffer returned " << vk_get_error_name(result) << "\n";
         return halide_error_code_generic_error;
     }
+    return halide_error_code_success;
+}
 
-    vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline);
-    vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
-                            descriptor_set_index, 1, &descriptor_set, 0, nullptr);
-    vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ);
+int vk_end_command_buffer(void *user_context, VkCommandBuffer command_buffer) {
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_end_command_buffer (user_context: " << user_context << ", "
+        << "command_buffer: " << (void *)command_buffer << ")\n";
+#endif
 
-    result = vkEndCommandBuffer(command_buffer);
+    VkResult result = vkEndCommandBuffer(command_buffer);
     if (result != VK_SUCCESS) {
         error(user_context) << "vkEndCommandBuffer returned " << vk_get_error_name(result) << "\n";
         return halide_error_code_generic_error;
     }
+    return halide_error_code_success;
+}
 
+int vk_bind_pipeline(void *user_context, VkCommandBuffer command_buffer, VkPipeline compute_pipeline) {
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_bind_pipeline (user_context: " << user_context << ", "
+        << "command_buffer: " << (void *)command_buffer << ", "
+        << "compute_pipeline: " << (void *)compute_pipeline << ")\n";
+#endif
+    vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline);
+    return halide_error_code_success;
+}
+
+int vk_dispatch_kernel(void *user_context,
+                       VkDevice device,
+                       VkCommandBuffer command_buffer,
+                       VkPipeline compute_pipeline,
+                       VkPipelineLayout pipeline_layout,
+                       VkDescriptorSet descriptor_set,
+                       uint32_t descriptor_set_index,
+                       int blocksX, int blocksY, int blocksZ) {
+
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_dispatch_kernel (user_context: " << user_context << ", "
+        << "device: " << (void *)device << ", "
+        << "command_buffer: " << (void *)command_buffer << ", "
+        << "pipeline_layout: " << (void *)pipeline_layout << ", "
+        << "descriptor_set: " << (void *)descriptor_set << ", "
+        << "descriptor_set_index: " << descriptor_set_index << ", "
+        << "blocks: " << blocksX << ", " << blocksY << ", " << blocksZ << ")\n";
+#endif
+
+    vkCmdDispatch(command_buffer, blocksX, blocksY, blocksZ);
     return halide_error_code_success;
 }
 
@@ -353,16 +378,14 @@ int vk_create_descriptor_pool(void *user_context,
         pool_sizes.append(user_context, &storage_buffer_size);
     }
 
-    VkDescriptorPoolCreateInfo descriptor_pool_info = {
-        VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,   // struct type
-        nullptr,                                         // point to struct extending this
-        0,                                               // flags
-        1,                                               // this pool will only be used for creating one descriptor set!
-        (uint32_t)pool_sizes.size(),                     // pool size count
-        (const VkDescriptorPoolSize *)pool_sizes.data()  // ptr to descriptr pool sizes
-    };
+    VkDescriptorPoolCreateInfo pool_create_info{};
+    pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
+    pool_create_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
+    pool_create_info.maxSets = 1;
+    pool_create_info.poolSizeCount = (uint32_t)pool_sizes.size();
+    pool_create_info.pPoolSizes = (const VkDescriptorPoolSize *)pool_sizes.data();
 
-    VkResult result = vkCreateDescriptorPool(allocator->current_device(), &descriptor_pool_info, allocator->callbacks(), descriptor_pool);
+    VkResult result = vkCreateDescriptorPool(allocator->current_device(), &pool_create_info, allocator->callbacks(), descriptor_pool);
     if (result != VK_SUCCESS) {
         error(user_context) << "Vulkan: Failed to create descriptor pool! vkCreateDescriptorPool returned " << vk_get_error_name(result) << "\n";
         return halide_error_code_generic_error;
@@ -415,52 +438,45 @@ int vk_create_descriptor_set_layout(void *user_context,
 
     // add all uniform buffers first
     for (uint32_t n = 0; n < uniform_buffer_count; ++n) {
-        VkDescriptorSetLayoutBinding uniform_buffer_layout = {
-            (uint32_t)layout_bindings.size(),   // binding index
-            VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,  // descriptor type
-            1,                                  // descriptor count
-            VK_SHADER_STAGE_COMPUTE_BIT,        // stage flags
-            nullptr                             // immutable samplers
-        };
-
+        // Params will be passed as UNIFORM_BUFFERs
+        VkDescriptorSetLayoutBinding uniform_buffer_layout_binding{};
+        uniform_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(),  // binding index
+            uniform_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+        uniform_buffer_layout_binding.descriptorCount = 1;
+        uniform_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
 #ifdef DEBUG_RUNTIME
         debug(user_context)
             << "  [" << (uint32_t)layout_bindings.size() << "] : UNIFORM_BUFFER\n";
 #endif
-
-        layout_bindings.append(user_context, &uniform_buffer_layout);
+        layout_bindings.append(user_context, &uniform_buffer_layout_binding);
     }
 
     // Add all other storage buffers
     for (uint32_t n = 0; n < storage_buffer_count; ++n) {
-
-        // halide buffers will be passed as STORAGE_BUFFERS
-        VkDescriptorSetLayoutBinding storage_buffer_layout = {
-            (uint32_t)layout_bindings.size(),   // binding index
-            VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,  // descriptor type
-            1,                                  // descriptor count
-            VK_SHADER_STAGE_COMPUTE_BIT,        // stage flags
-            nullptr                             // immutable samplers
-        };
+        // halide buffers will be passed as STORAGE_BUFFERs
+        VkDescriptorSetLayoutBinding storage_buffer_layout_binding{};
+        storage_buffer_layout_binding.binding = (uint32_t)layout_bindings.size(),  // binding index
+            storage_buffer_layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        storage_buffer_layout_binding.descriptorCount = 1;
+        storage_buffer_layout_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
 #ifdef DEBUG_RUNTIME
         debug(user_context)
             << "  [" << (uint32_t)layout_bindings.size() << "] : STORAGE_BUFFER\n";
 #endif
-
-        layout_bindings.append(user_context, &storage_buffer_layout);
+        layout_bindings.append(user_context, &storage_buffer_layout_binding);
     }
 
-    // Create the LayoutInfo struct
-    VkDescriptorSetLayoutCreateInfo layout_info = {
-        VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,    // structure type
-        nullptr,                                                // pointer to a struct extending this info
-        0,                                                      // flags
-        (uint32_t)layout_bindings.size(),                       // binding count
-        (VkDescriptorSetLayoutBinding *)layout_bindings.data()  // pointer to layout bindings array
-    };
+    VkDescriptorSetLayoutCreateInfo layout_create_info{};
+    layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
+    layout_create_info.pNext = nullptr;
+    if (vkCmdPushDescriptorSetKHR != nullptr) {
+        layout_create_info.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR;
+    }
+    layout_create_info.bindingCount = (uint32_t)layout_bindings.size();
+    layout_create_info.pBindings = (VkDescriptorSetLayoutBinding *)layout_bindings.data();
 
     // Create the descriptor set layout
-    VkResult result = vkCreateDescriptorSetLayout(allocator->current_device(), &layout_info, allocator->callbacks(), layout);
+    VkResult result = vkCreateDescriptorSetLayout(allocator->current_device(), &layout_create_info, allocator->callbacks(), layout);
     if (result != VK_SUCCESS) {
         error(user_context) << "vkCreateDescriptorSetLayout returned " << vk_get_error_name(result) << "\n";
         return halide_error_code_generic_error;
@@ -524,18 +540,19 @@ int vk_create_descriptor_set(void *user_context,
     return halide_error_code_success;
 }
 
-int vk_update_descriptor_set(void *user_context,
-                             VulkanMemoryAllocator *allocator,
-                             VkBuffer *scalar_args_buffer,
-                             size_t uniform_buffer_count,
-                             size_t storage_buffer_count,
-                             size_t arg_sizes[],
-                             void *args[],
-                             int8_t arg_is_buffer[],
-                             VkDescriptorSet descriptor_set) {
+int vk_get_descriptor_buffer_info(void *user_context,
+                                  VulkanMemoryAllocator *allocator,
+                                  VkDescriptorSet descriptor_set,
+                                  VkBuffer *scalar_args_buffer,
+                                  size_t uniform_buffer_count,
+                                  size_t storage_buffer_count,
+                                  size_t arg_sizes[],
+                                  void *args[],
+                                  int8_t arg_is_buffer[],
+                                  BlockStorage *descriptor_buffer_info_result) {
 #ifdef DEBUG_RUNTIME
     debug(user_context)
-        << " vk_update_descriptor_set (user_context: " << user_context << ", "
+        << " vk_get_descriptor_buffer_info (user_context: " << user_context << ", "
         << "allocator: " << (void *)allocator << ", "
         << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", "
         << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", "
@@ -543,7 +560,17 @@ int vk_update_descriptor_set(void *user_context,
         << "descriptor_set: " << (void *)descriptor_set << ")\n";
 #endif
     if (allocator == nullptr) {
-        error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n";
+        error(user_context) << "Vulkan: Failed to get descriptor buffer info. Invalid allocator pointer!\n";
+        return halide_error_code_generic_error;
+    }
+
+    if (descriptor_buffer_info_result == nullptr) {
+        error(user_context) << "Vulkan: Failed to get descriptor buffer info. Invalid result pointer!\n";
+        return halide_error_code_generic_error;
+    }
+
+    if (descriptor_buffer_info_result->current_config().entry_size != sizeof(VkDescriptorBufferInfo)) {
+        error(user_context) << "Vulkan: Failed to get descriptor buffer info. Invalid descriptor buffer info result!\n";
         return halide_error_code_generic_error;
     }
 
@@ -552,41 +579,20 @@ int vk_update_descriptor_set(void *user_context,
     dbi_config.entry_size = sizeof(VkDescriptorBufferInfo);
     BlockStorage descriptor_buffer_info(user_context, dbi_config);
 
-    BlockStorage::Config wds_config;
-    wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
-    wds_config.entry_size = sizeof(VkWriteDescriptorSet);
-    BlockStorage write_descriptor_set(user_context, wds_config);
-
     // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER
-    VkDescriptorBufferInfo *scalar_args_entry = nullptr;
     if (scalar_args_buffer != nullptr) {
-        VkDescriptorBufferInfo scalar_args_descriptor_buffer_info = {
-            *scalar_args_buffer,  // the buffer
-            0,                    // offset
-            VK_WHOLE_SIZE         // range
-        };
-        descriptor_buffer_info.append(user_context, &scalar_args_descriptor_buffer_info);
-        scalar_args_entry = (VkDescriptorBufferInfo *)descriptor_buffer_info.back();
+        VkDescriptorBufferInfo scalar_args_buffer_info{};
+        scalar_args_buffer_info.buffer = *scalar_args_buffer;
+        scalar_args_buffer_info.offset = 0;
+        scalar_args_buffer_info.range = VK_WHOLE_SIZE;
+        descriptor_buffer_info_result->append(user_context, &scalar_args_buffer_info);
 
 #ifdef DEBUG_RUNTIME
-        debug(user_context) << "  [" << (uint32_t)write_descriptor_set.size() << "] UNIFORM_BUFFER : "
+        debug(user_context) << "  [" << (uint32_t)descriptor_buffer_info.size() << "] UNIFORM_BUFFER : "
                             << "buffer=" << (void *)scalar_args_buffer << " "
                             << "offset=" << (uint32_t)(0) << " "
                             << "size=VK_WHOLE_SIZE\n";
 #endif
-        VkWriteDescriptorSet uniform_buffer_write_descriptor_set = {
-            VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,  // struct type
-            nullptr,                                 // pointer to struct extending this
-            descriptor_set,                          // descriptor set to update
-            0,                                       // binding slot
-            0,                                       // array elem
-            1,                                       // num to update
-            VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,       // descriptor type
-            nullptr,                                 // for images
-            scalar_args_entry,                       // info for buffer
-            nullptr                                  // for texel buffers
-        };
-        write_descriptor_set.append(user_context, &uniform_buffer_write_descriptor_set);
     }
 
     // Add all the other device buffers as STORAGE BUFFERs
@@ -607,40 +613,243 @@ int vk_update_descriptor_set(void *user_context,
             VkDeviceSize range_offset = device_region->range.head_offset;
             VkDeviceSize range_size = device_region->size - device_region->range.head_offset - device_region->range.tail_offset;
             halide_abort_if_false(user_context, (device_region->size - device_region->range.head_offset - device_region->range.tail_offset) > 0);
-            VkDescriptorBufferInfo device_buffer_info = {
-                *device_buffer,  // the buffer
-                range_offset,    // range offset
-                range_size       // range size
-            };
-            descriptor_buffer_info.append(user_context, &device_buffer_info);
-            VkDescriptorBufferInfo *device_buffer_entry = (VkDescriptorBufferInfo *)descriptor_buffer_info.back();
+
+            VkDescriptorBufferInfo device_buffer_info{};
+            device_buffer_info.buffer = *device_buffer;
+            device_buffer_info.offset = range_offset;
+            device_buffer_info.range = range_size;
+            descriptor_buffer_info_result->append(user_context, &device_buffer_info);
 
 #ifdef DEBUG_RUNTIME
-            debug(user_context) << "  [" << (uint32_t)write_descriptor_set.size() << "] STORAGE_BUFFER : "
+            debug(user_context) << "  [" << (uint32_t)descriptor_buffer_info.size() << "] STORAGE_BUFFER : "
                                 << "region=" << (void *)device_region << " "
                                 << "buffer=" << (void *)device_buffer << " "
                                 << "offset=" << (uint32_t)(range_offset) << " "
                                 << "size=" << (uint32_t)(range_size) << "\n";
 #endif
-
-            VkWriteDescriptorSet storage_buffer_write_descriptor_set = {
-                VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,  // struct type
-                nullptr,                                 // pointer to struct extending this
-                descriptor_set,                          // descriptor set to update
-                (uint32_t)write_descriptor_set.size(),   // binding slot
-                0,                                       // array elem
-                1,                                       // num to update
-                VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,       // descriptor type
-                nullptr,                                 // for images
-                device_buffer_entry,                     // info for buffer
-                nullptr                                  // for texel buffers
-            };
-            write_descriptor_set.append(user_context, &storage_buffer_write_descriptor_set);
         }
     }
 
+    return halide_error_code_success;
+}
+
+int vk_get_write_descriptor_set_info(void *user_context,
+                                     VulkanMemoryAllocator *allocator,
+                                     BlockStorage *descriptor_buffer_info,
+                                     VkDescriptorSet descriptor_set,
+                                     VkBuffer *scalar_args_buffer,
+                                     BlockStorage *write_descriptor_set_result) {
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_fill_write_descriptor_set (user_context: " << user_context << ", "
+        << "allocator: " << (void *)allocator << ", "
+        << "descriptor_buffer_info: " << (void *)descriptor_buffer_info << ", "
+        << "descriptor_set: " << (void *)descriptor_set << ", "
+        << "scalar_args_buffer: " << (void *)scalar_args_buffer << ")\n";
+#endif
+    if (allocator == nullptr) {
+        error(user_context) << "Vulkan: Failed to fill write descriptor set ... invalid allocator pointer!\n";
+        return halide_error_code_generic_error;
+    }
+
+    if (write_descriptor_set_result == nullptr) {
+        error(user_context) << "Vulkan: Failed to fill write descriptor set ... invalid result pointer!\n";
+        return halide_error_code_generic_error;
+    }
+
+    if (write_descriptor_set_result->current_config().entry_size != sizeof(VkWriteDescriptorSet)) {
+        error(user_context) << "Vulkan: Failed to fill write descriptor set. Invalide write descriptor set result!\n";
+        return halide_error_code_generic_error;
+    }
+
+    // First binding will be the scalar args buffer (if needed) passed as a UNIFORM BUFFER
+    size_t index = 0;
+    if ((scalar_args_buffer != nullptr) && (!descriptor_buffer_info->empty())) {
+        VkWriteDescriptorSet uniform_buffer_write_entry{};
+        uniform_buffer_write_entry.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        uniform_buffer_write_entry.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
+        uniform_buffer_write_entry.dstSet = descriptor_set;
+        uniform_buffer_write_entry.dstBinding = 0;
+        uniform_buffer_write_entry.dstArrayElement = 0;
+        uniform_buffer_write_entry.descriptorCount = 1;
+        uniform_buffer_write_entry.pBufferInfo = (VkDescriptorBufferInfo *)(*descriptor_buffer_info)[index++];
+        write_descriptor_set_result->append(user_context, &uniform_buffer_write_entry);
+    }
+
+    // Add all the other device buffers as STORAGE BUFFERs
+    while (index < descriptor_buffer_info->size()) {
+        VkWriteDescriptorSet storage_buffer_write_entry{};
+        storage_buffer_write_entry.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
+        storage_buffer_write_entry.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
+        storage_buffer_write_entry.dstSet = descriptor_set;
+        storage_buffer_write_entry.dstBinding = (uint32_t)write_descriptor_set_result->size();
+        storage_buffer_write_entry.dstArrayElement = 0;
+        storage_buffer_write_entry.descriptorCount = 1;
+        storage_buffer_write_entry.pBufferInfo = (VkDescriptorBufferInfo *)(*descriptor_buffer_info)[index++];
+        write_descriptor_set_result->append(user_context, &storage_buffer_write_entry);
+    }
+
+    return halide_error_code_success;
+}
+
+int vk_update_descriptor_set(void *user_context,
+                             VulkanMemoryAllocator *allocator,
+                             VkBuffer *scalar_args_buffer,
+                             size_t uniform_buffer_count,
+                             size_t storage_buffer_count,
+                             size_t arg_sizes[],
+                             void *args[],
+                             int8_t arg_is_buffer[],
+                             VkDescriptorSet descriptor_set) {
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_update_descriptor_set (user_context: " << user_context << ", "
+        << "allocator: " << (void *)allocator << ", "
+        << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", "
+        << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", "
+        << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ", "
+        << "descriptor_set: " << (void *)descriptor_set << ")\n";
+#endif
+    if (allocator == nullptr) {
+        error(user_context) << "Vulkan: Failed to update descriptor set ... invalid allocator pointer!\n";
+        return halide_error_code_generic_error;
+    }
+
+    BlockStorage::Config dbi_config;
+    dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
+    dbi_config.entry_size = sizeof(VkDescriptorBufferInfo);
+    BlockStorage descriptor_buffer_info(user_context, dbi_config);
+
+    int error_code = vk_get_descriptor_buffer_info(
+        user_context,
+        allocator,
+        descriptor_set,
+        scalar_args_buffer,
+        uniform_buffer_count,
+        storage_buffer_count,
+        arg_sizes,
+        args,
+        arg_is_buffer,
+        &descriptor_buffer_info);
+
+    if (error_code) {
+        error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n";
+        return halide_error_code_generic_error;
+    }
+
+    BlockStorage::Config wds_config;
+    wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
+    wds_config.entry_size = sizeof(VkWriteDescriptorSet);
+    BlockStorage write_descriptor_set(user_context, wds_config);
+
+    error_code = vk_get_write_descriptor_set_info(
+        user_context,
+        allocator,
+        &descriptor_buffer_info,
+        descriptor_set,
+        scalar_args_buffer,
+        &write_descriptor_set);
+
+    if (error_code) {
+        error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n";
+        return halide_error_code_generic_error;
+    }
+
+    // issue the update call to populate the descriptor set
+    uint32_t update_size = (uint32_t)write_descriptor_set.size();
+    const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data();
+    vkUpdateDescriptorSets(allocator->current_device(), update_size, update_data, 0, nullptr);
+    return halide_error_code_success;
+}
+
+int vk_bind_descriptor_sets(void *user_context,
+                            VkCommandBuffer command_buffer,
+                            VkPipelineLayout pipeline_layout,
+                            VkDescriptorSet descriptor_set,
+                            uint32_t descriptor_set_index) {
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_bind_descriptor_sets_to_compute_pipeline (user_context: " << user_context << ", "
+        << "command_buffer: " << (void *)command_buffer << ", "
+        << "pipeline_layout: " << (void *)pipeline_layout << ", "
+        << "descriptor_set: " << (void *)descriptor_set << ", "
+        << "descriptor_set_index: " << descriptor_set_index << ")\n";
+#endif
+    vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout,
+                            descriptor_set_index, 1, &descriptor_set, 0, nullptr);
+    return halide_error_code_success;
+}
+
+int vk_push_descriptor_set(void *user_context,
+                           VulkanMemoryAllocator *allocator,
+                           VkCommandBuffer command_buffer,
+                           VkPipeline compute_pipeline,
+                           VkPipelineLayout pipeline_layout,
+                           VkDescriptorSet descriptor_set,
+                           VkBuffer *scalar_args_buffer,
+                           size_t uniform_buffer_count,
+                           size_t storage_buffer_count,
+                           size_t arg_sizes[],
+                           void *args[],
+                           int8_t arg_is_buffer[]) {
+#ifdef DEBUG_RUNTIME
+    debug(user_context)
+        << " vk_push_descriptor_set (user_context: " << user_context << ", "
+        << "allocator: " << (void *)allocator << ", "
+        << "scalar_args_buffer: " << (void *)scalar_args_buffer << ", "
+        << "uniform_buffer_count: " << (uint32_t)uniform_buffer_count << ", "
+        << "storage_buffer_count: " << (uint32_t)storage_buffer_count << ", "
+        << "descriptor_set: " << (void *)descriptor_set << ")\n";
+#endif
+    if (allocator == nullptr) {
+        error(user_context) << "Vulkan: Failed to create descriptor set ... invalid allocator pointer!\n";
+        return halide_error_code_generic_error;
+    }
+
+    BlockStorage::Config dbi_config;
+    dbi_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
+    dbi_config.entry_size = sizeof(VkDescriptorBufferInfo);
+    BlockStorage descriptor_buffer_info(user_context, dbi_config);
+
+    int error_code = vk_get_descriptor_buffer_info(
+        user_context,
+        allocator,
+        descriptor_set,
+        scalar_args_buffer,
+        uniform_buffer_count,
+        storage_buffer_count,
+        arg_sizes,
+        args,
+        arg_is_buffer,
+        &descriptor_buffer_info);
+
+    if (error_code) {
+        error(user_context) << "Vulkan: Failed to update descriptor set ... get descriptor buffer info failed!\n";
+        return halide_error_code_generic_error;
+    }
+
+    BlockStorage::Config wds_config;
+    wds_config.minimum_capacity = storage_buffer_count + uniform_buffer_count;
+    wds_config.entry_size = sizeof(VkWriteDescriptorSet);
+    BlockStorage write_descriptor_set(user_context, wds_config);
+
+    error_code = vk_get_write_descriptor_set_info(
+        user_context,
+        allocator,
+        &descriptor_buffer_info,
+        descriptor_set,
+        scalar_args_buffer,
+        &write_descriptor_set);
+
+    if (error_code) {
+        error(user_context) << "Vulkan: Failed to update descriptor set ... write descriptor set creation failed!\n";
+        return halide_error_code_generic_error;
+    }
+
     // issue the update call to populate the descriptor set
-    vkUpdateDescriptorSets(allocator->current_device(), (uint32_t)write_descriptor_set.size(), (const VkWriteDescriptorSet *)write_descriptor_set.data(), 0, nullptr);
+    uint32_t update_size = (uint32_t)write_descriptor_set.size();
+    const VkWriteDescriptorSet *update_data = (const VkWriteDescriptorSet *)write_descriptor_set.data();
+    vkCmdPushDescriptorSetKHR(command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, update_size, update_data);
     return halide_error_code_success;
 }
 
@@ -1665,7 +1874,7 @@ void vk_destroy_compilation_cache_entry(VulkanCompilationCacheEntry *cache_entry
     debug(user_context)
         << " vk_destroy_compilation_cache_entry (cache_entry: " << cache_entry << ")\n";
 
-    if (cache_entry == nullptr) {
+    if ((cache_entry == nullptr) || (cache_entry->compiled_modules == nullptr)) {
         return;
     }
 
diff --git a/test/autoschedulers/mullapudi2016/histogram.cpp b/test/autoschedulers/mullapudi2016/histogram.cpp
index 3ef2ded28f0a..bcee5695c12c 100644
--- a/test/autoschedulers/mullapudi2016/histogram.cpp
+++ b/test/autoschedulers/mullapudi2016/histogram.cpp
@@ -120,11 +120,17 @@ double run_test(bool auto_schedule) {
 }
 
 int main(int argc, char **argv) {
-    if (get_jit_target_from_environment().arch == Target::WebAssembly) {
+    Halide::Target target = get_jit_target_from_environment();
+    if (target.arch == Target::WebAssembly) {
         printf("[SKIP] Autoschedulers do not support WebAssembly.\n");
         return 0;
     }
 
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
+
     if (argc != 2) {
         fprintf(stderr, "Usage: %s <autoscheduler-lib>\n", argv[0]);
         return 1;
diff --git a/test/correctness/bool_predicate_cast.cpp b/test/correctness/bool_predicate_cast.cpp
index 1043f329b76c..ff958eb52a3b 100644
--- a/test/correctness/bool_predicate_cast.cpp
+++ b/test/correctness/bool_predicate_cast.cpp
@@ -8,6 +8,11 @@ int main(int argc, char **argv) {
     // Test explicit casting of a predicate to an integer as part of a reduction
     // NOTE: triggers a convert_to_bool in Vulkan for a SelectOp
     Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
+
     Var x("x"), y("y");
 
     Func input("input");
diff --git a/test/correctness/boundary_conditions.cpp b/test/correctness/boundary_conditions.cpp
index 61422d130d01..02ef9ded6f2b 100644
--- a/test/correctness/boundary_conditions.cpp
+++ b/test/correctness/boundary_conditions.cpp
@@ -392,6 +392,10 @@ int main(int argc, char **argv) {
         // The wasm jit is very slow, so shorten this test here.
         vector_width_max = 8;
     }
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
 
     std::vector<Task> tasks;
     for (int vector_width = 1; vector_width <= vector_width_max; vector_width *= 2) {
diff --git a/test/correctness/convolution.cpp b/test/correctness/convolution.cpp
index 60689c33ec87..72ad5af5ee33 100644
--- a/test/correctness/convolution.cpp
+++ b/test/correctness/convolution.cpp
@@ -4,6 +4,13 @@
 using namespace Halide;
 
 int main(int argc, char **argv) {
+    Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan)) {
+        if (!target.has_feature(Target::VulkanInt16)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
+            return 0;
+        }
+    }
 
     // int W = 64*3, H = 64*3;
     const int W = 128, H = 48;
@@ -64,8 +71,6 @@ int main(int argc, char **argv) {
     Func blur2("blur2");
     blur2(x, y) = sum(tent(r.x, r.y) * input(x + r.x - 1, y + r.y - 1));
 
-    Target target = get_jit_target_from_environment();
-
     if (target.has_gpu_feature()) {
         Var xi("xi"), yi("yi");
 
diff --git a/test/correctness/convolution_multiple_kernels.cpp b/test/correctness/convolution_multiple_kernels.cpp
index 8a27787d36e9..831c6980ab05 100644
--- a/test/correctness/convolution_multiple_kernels.cpp
+++ b/test/correctness/convolution_multiple_kernels.cpp
@@ -39,6 +39,11 @@ int main(int argc, char **argv) {
                  sum(cast<uint16_t>(box2(r.x, r.y)) * input(x + r.x, y + r.y));
 
     Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
+        return 0;
+    }
+
     if (target.has_gpu_feature()) {
         Var xi("xi"), yi("yi");
         blur.gpu_tile(x, y, xi, yi, 16, 16);
diff --git a/test/correctness/dilate3x3.cpp b/test/correctness/dilate3x3.cpp
index b41c8040192a..e747c82a285b 100644
--- a/test/correctness/dilate3x3.cpp
+++ b/test/correctness/dilate3x3.cpp
@@ -27,6 +27,10 @@ int main(int argc, char **argv) {
 
     // Schedule.
     Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
     if (target.has_gpu_feature()) {
         Var xi("xi"), yi("yi");
         dilate3x3.gpu_tile(x, y, xi, yi, 16, 16);
diff --git a/test/correctness/gpu_arg_types.cpp b/test/correctness/gpu_arg_types.cpp
index fe06f6849f45..214a2fdd5a39 100644
--- a/test/correctness/gpu_arg_types.cpp
+++ b/test/correctness/gpu_arg_types.cpp
@@ -3,10 +3,15 @@
 using namespace Halide;
 int main(int argc, char *argv[]) {
 
-    if (!get_jit_target_from_environment().has_gpu_feature()) {
+    Halide::Target target = get_jit_target_from_environment();
+    if (!target.has_gpu_feature()) {
         printf("[SKIP] No GPU target enabled.\n");
         return 0;
     }
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
+        return 0;
+    }
 
     Func f, g;
     Var x, y, tx, ty;
diff --git a/test/correctness/gpu_dynamic_shared.cpp b/test/correctness/gpu_dynamic_shared.cpp
index f98636ea8905..3ea7f2b085e7 100644
--- a/test/correctness/gpu_dynamic_shared.cpp
+++ b/test/correctness/gpu_dynamic_shared.cpp
@@ -11,14 +11,20 @@ int main(int argc, char **argv) {
     }
 
     if (t.has_feature(Target::Vulkan)) {
+        if (!t.has_feature(Target::VulkanV13)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing 1.3 feature in target!\n");
+            return 0;
+        }
+
         const auto *interface = get_device_interface_for_device_api(DeviceAPI::Vulkan);
         assert(interface->compute_capability != nullptr);
         int major, minor;
         int err = interface->compute_capability(nullptr, &major, &minor);
         if (err != 0 || (major == 1 && minor < 3)) {
-            printf("[SKIP] Vulkan %d.%d is less than required 1.3.\n", major, minor);
+            printf("[SKIP] Vulkan runtime support %d.%d is less than required 1.3.\n", major, minor);
             return 0;
         }
+
         if ((t.os == Target::IOS) || (t.os == Target::OSX)) {
             printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK doesn't support dynamic LocalSizeId yet)!\n");
             return 0;
diff --git a/test/correctness/gpu_reuse_shared_memory.cpp b/test/correctness/gpu_reuse_shared_memory.cpp
index 37e932d78273..1a8977f75a76 100644
--- a/test/correctness/gpu_reuse_shared_memory.cpp
+++ b/test/correctness/gpu_reuse_shared_memory.cpp
@@ -172,6 +172,17 @@ int main(int argc, char **argv) {
         return 0;
     }
 
+    if (t.has_feature(Target::Vulkan)) {
+        if (!t.has_feature(Target::VulkanInt8)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+            return 0;
+        }
+        if (!t.has_feature(Target::VulkanInt16)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
+            return 0;
+        }
+    }
+
     for (auto memory_type : {MemoryType::GPUShared, MemoryType::Heap}) {
         printf("Running multi thread type test\n");
         if (multi_thread_type_test(memory_type) != 0) {
diff --git a/test/correctness/gpu_transpose.cpp b/test/correctness/gpu_transpose.cpp
index 781deb004fef..503b099862f1 100644
--- a/test/correctness/gpu_transpose.cpp
+++ b/test/correctness/gpu_transpose.cpp
@@ -4,11 +4,17 @@
 using namespace Halide;
 
 int main(int argc, char **argv) {
-    if (!get_jit_target_from_environment().has_gpu_feature()) {
+    Target t = get_jit_target_from_environment();
+    if (!t.has_gpu_feature()) {
         printf("[SKIP] No GPU target enabled.\n");
         return 0;
     }
 
+    if (t.has_feature(Target::Vulkan) && (!t.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
+
     ImageParam in(UInt(8), 2);
 
     Var x, y;
diff --git a/test/correctness/interleave_rgb.cpp b/test/correctness/interleave_rgb.cpp
index 3a679a239035..d7d045ed4125 100644
--- a/test/correctness/interleave_rgb.cpp
+++ b/test/correctness/interleave_rgb.cpp
@@ -103,14 +103,15 @@ bool test_deinterleave(int x_stride) {
 }
 
 int main(int argc, char **argv) {
+    Target target = get_jit_target_from_environment();
     for (int x_stride : {3, 4}) {
-        if (!test_interleave<uint8_t>(x_stride)) return 1;
-        if (!test_interleave<uint16_t>(x_stride)) return 1;
-        if (!test_interleave<uint32_t>(x_stride)) return 1;
+        if (target.supports_type(halide_type_of<uint8_t>()) && !test_interleave<uint8_t>(x_stride)) return 1;
+        if (target.supports_type(halide_type_of<uint16_t>()) && !test_interleave<uint16_t>(x_stride)) return 1;
+        if (target.supports_type(halide_type_of<uint32_t>()) && !test_interleave<uint32_t>(x_stride)) return 1;
 
-        if (!test_deinterleave<uint8_t>(x_stride)) return 1;
-        if (!test_deinterleave<uint16_t>(x_stride)) return 1;
-        if (!test_deinterleave<uint32_t>(x_stride)) return 1;
+        if (target.supports_type(halide_type_of<uint8_t>()) && !test_deinterleave<uint8_t>(x_stride)) return 1;
+        if (target.supports_type(halide_type_of<uint16_t>()) && !test_deinterleave<uint16_t>(x_stride)) return 1;
+        if (target.supports_type(halide_type_of<uint32_t>()) && !test_deinterleave<uint32_t>(x_stride)) return 1;
     }
     printf("Success!\n");
     return 0;
diff --git a/test/correctness/interleave_x.cpp b/test/correctness/interleave_x.cpp
index 1120390cac1e..4e5361123727 100644
--- a/test/correctness/interleave_x.cpp
+++ b/test/correctness/interleave_x.cpp
@@ -11,6 +11,11 @@ int main(int argc, char **argv) {
     interleaved(x, y) = select(x % 2 == 0, cast<uint16_t>(3), cast<uint16_t>(7));
 
     Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt16))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing support for Int16!\n");
+        return 0;
+    }
+
     if (target.has_gpu_feature()) {
         Var tx("tx"), ty("ty");
         interleaved.gpu_tile(x, y, tx, ty, 16, 16);
diff --git a/test/correctness/logical.cpp b/test/correctness/logical.cpp
index 1bd134bc37f4..9f0d18289211 100644
--- a/test/correctness/logical.cpp
+++ b/test/correctness/logical.cpp
@@ -13,6 +13,12 @@ Expr u16(Expr a) {
 
 int main(int argc, char **argv) {
 
+    Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
+
     Buffer<uint8_t> input(128, 64);
 
     for (int y = 0; y < input.height(); y++) {
@@ -28,7 +34,6 @@ int main(int argc, char **argv) {
                              ((input(x, y) > 40) && (!(input(x, y) > 50))),
                          u8(255), u8(0));
 
-        Target target = get_jit_target_from_environment();
         if (target.has_gpu_feature()) {
             f.gpu_tile(x, y, xi, yi, 16, 16);
             f.vectorize(xi, 4);
@@ -62,7 +67,6 @@ int main(int argc, char **argv) {
                              ((input(x, y) > 40) && (!common_cond)),
                          u8(255), u8(0));
 
-        Target target = get_jit_target_from_environment();
         if (target.has_gpu_feature()) {
             f.gpu_tile(x, y, xi, yi, 16, 16);
             f.vectorize(xi, 4);
@@ -93,8 +97,6 @@ int main(int argc, char **argv) {
         Func f("f");
         f(x, y) = select(x < 10 || x > 20 || y < 10 || y > 20, 0, input(x, y));
 
-        Target target = get_jit_target_from_environment();
-
         if (target.has_gpu_feature()) {
             f.gpu_tile(x, y, xi, yi, 16, 16);
             f.vectorize(xi, 4);
@@ -124,7 +126,6 @@ int main(int argc, char **argv) {
         Expr ten = 10;
         f(x, y) = select(input(x, y) > ten, u8(255), u8(0));
 
-        Target target = get_jit_target_from_environment();
         if (target.has_gpu_feature()) {
             f.gpu_tile(x, y, xi, yi, 16, 16);
             f.vectorize(xi, 4);
@@ -177,7 +178,6 @@ int main(int argc, char **argv) {
             cpu.compute_root();
             gpu.compute_root();
 
-            Target target = get_jit_target_from_environment();
             if (target.has_feature(Target::OpenCL) && n == 16 && w == 32) {
                 // Workaround for https://github.com/halide/Halide/issues/2477
                 printf("Skipping uint%d -> uint%d for OpenCL\n", n, w);
diff --git a/test/correctness/median3x3.cpp b/test/correctness/median3x3.cpp
index 9129ea0b6418..7175cd657962 100644
--- a/test/correctness/median3x3.cpp
+++ b/test/correctness/median3x3.cpp
@@ -13,6 +13,13 @@ Expr mid3(Expr a, Expr b, Expr c) {
 }
 
 int main(int arch, char **argv) {
+
+    Target target = get_jit_target_from_environment();
+    if (target.has_feature(Target::Vulkan) && (!target.has_feature(Target::VulkanInt8))) {
+        printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+        return 0;
+    }
+
     const int W = 256, H = 256;
     Buffer<uint8_t> in(W, H);
     // Set up the input.
@@ -43,7 +50,6 @@ int main(int arch, char **argv) {
     median3x3(x, y) = mid3(min_max(x, y), max_min(x, y), mid_mid(x, y));
 
     // Schedule.
-    Target target = get_jit_target_from_environment();
     if (target.has_gpu_feature()) {
         Var xi("xi"), yi("yi");
         median3x3.gpu_tile(x, y, xi, yi, 16, 16);
diff --git a/test/correctness/mul_div_mod.cpp b/test/correctness/mul_div_mod.cpp
index 8eca8141bba2..7368ef462e0b 100644
--- a/test/correctness/mul_div_mod.cpp
+++ b/test/correctness/mul_div_mod.cpp
@@ -540,6 +540,21 @@ void add_test_div_mod(int vector_width, ScheduleVariant scheduling, Target targe
 int main(int argc, char **argv) {
     Target target = get_jit_target_from_environment();
 
+    if (target.has_feature(Target::Vulkan)) {
+        if (!target.has_feature(Target::VulkanInt8)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+            return 0;
+        }
+        if (!target.has_feature(Target::VulkanInt16)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
+            return 0;
+        }
+        if (!target.has_feature(Target::VulkanInt64)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int64 support!\n");
+            return 0;
+        }
+    }
+
     ScheduleVariant scheduling = CPU;
     if (target.has_gpu_feature()) {
         scheduling = TiledGPU;
diff --git a/test/correctness/multiple_outputs.cpp b/test/correctness/multiple_outputs.cpp
index d630cfdf082a..d42204bbd250 100644
--- a/test/correctness/multiple_outputs.cpp
+++ b/test/correctness/multiple_outputs.cpp
@@ -4,7 +4,8 @@
 using namespace Halide;
 
 int main(int argc, char **argv) {
-    const bool use_gpu = get_jit_target_from_environment().has_gpu_feature();
+    Target target = get_jit_target_from_environment();
+    const bool use_gpu = target.has_gpu_feature();
 
     // An internal Func that produces multiple values.
     {
@@ -93,7 +94,7 @@ int main(int argc, char **argv) {
     }
 
     // Now multiple output Funcs via inferred Realization
-    {
+    if (target.supports_type(halide_type_of<uint8_t>()) && target.supports_type(halide_type_of<int16_t>())) {
         Func f, g;
         Var x, xi;
         f(x) = cast<float>(100 * x);
diff --git a/test/correctness/widening_reduction.cpp b/test/correctness/widening_reduction.cpp
index 43b79486e03a..ad720afaa7bd 100644
--- a/test/correctness/widening_reduction.cpp
+++ b/test/correctness/widening_reduction.cpp
@@ -9,11 +9,20 @@ using namespace Halide::Internal;
 int main(int arch, char **argv) {
 
     Halide::Target target = get_jit_target_from_environment();
-    if (target.has_feature(Target::Vulkan) && ((target.os == Target::IOS) || target.os == Target::OSX)) {
-        printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n");
-        return 0;
+    if (target.has_feature(Target::Vulkan)) {
+        if (!target.has_feature(Target::VulkanInt8)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int8 support!\n");
+            return 0;
+        }
+        if (!target.has_feature(Target::VulkanInt16)) {
+            printf("[SKIP] Skipping test for Vulkan ... missing Int16 support!\n");
+            return 0;
+        }
+        if ((target.os == Target::IOS) || (target.os == Target::OSX)) {
+            printf("[SKIP] Skipping test for Vulkan on iOS/OSX (MoltenVK fails to convert max/min intrinsics correctly)!\n");
+            return 0;
+        }
     }
-
     const int W = 256, H = 256;
 
     Buffer<uint8_t> in(W, H);