diff --git a/src/gpu/intel/conv/jit/normalization.cpp b/src/gpu/intel/conv/jit/normalization.cpp index c5ece2a7f53..f204e951f77 100644 --- a/src/gpu/intel/conv/jit/normalization.cpp +++ b/src/gpu/intel/conv/jit/normalization.cpp @@ -156,7 +156,7 @@ uint32_t post_op_view_mapper_t::normalize_mask(uint32_t orig_mask) const { if (b > 0) new_tile.set(b + 1, cvt_dims[b]); } new_tile[1] = cvt_dims[1]; - cvt_dims = new_tile; + cvt_dims = std::move(new_tile); } uint32_t mask = 0; diff --git a/src/gpu/intel/conv/jit/plan.cpp b/src/gpu/intel/conv/jit/plan.cpp index 16ad317aba0..95e50006a0b 100644 --- a/src/gpu/intel/conv/jit/plan.cpp +++ b/src/gpu/intel/conv/jit/plan.cpp @@ -2336,7 +2336,7 @@ class plan_builder_t { if (plan_.hw < ngen::HW::XeHPG) { // Verifies that SLM loads after k-slicing are at GRF granularity. - auto l_sub = l.sub(tile_t(rem_dims)); + auto l_sub = l.sub(tile_t(std::move(rem_dims))); int bytes = l_sub.type().size(); stride_t stride = 1; for (auto &b : l_sub.blocks()) { diff --git a/src/gpu/intel/conv/jit/v2/kernel_desc.cpp b/src/gpu/intel/conv/jit/v2/kernel_desc.cpp index d3b0bb268af..d285565d75b 100644 --- a/src/gpu/intel/conv/jit/v2/kernel_desc.cpp +++ b/src/gpu/intel/conv/jit/v2/kernel_desc.cpp @@ -840,7 +840,7 @@ static bool try_parse_internal_arg(std::string s, std::string &base_name, denom = std::stoi(s.substr(pos)); s = s.substr(0, divup_pos); } - base_name = s; + base_name = std::move(s); return true; } diff --git a/src/gpu/intel/conv/jit/zp_plan.cpp b/src/gpu/intel/conv/jit/zp_plan.cpp index 4841ff8d9e2..dbe8e634ca4 100644 --- a/src/gpu/intel/conv/jit/zp_plan.cpp +++ b/src/gpu/intel/conv/jit/zp_plan.cpp @@ -122,8 +122,8 @@ class split_dispatcher_t { for (int factor : {2, 4}) { auto &splits = (abc == abc_kind_t::a) ? a_splits_ : b_splits_; if ((int)splits.size() <= factor) splits.resize(factor + 1); - splits[factor] = split_t( - c_layout, mapper, abc, factor, simd_dim_idx_, simd_); + splits[factor] = split_t(c_layout, mapper, abc, factor, + static_cast(simd_dim_idx_), simd_); } } for (int b : {32, 16, 8}) { diff --git a/src/gpu/intel/gemm/jit.cpp b/src/gpu/intel/gemm/jit.cpp index 4f37dccc311..26dd982564e 100644 --- a/src/gpu/intel/gemm/jit.cpp +++ b/src/gpu/intel/gemm/jit.cpp @@ -446,6 +446,7 @@ status_t gen_t::execute(const exec_ctx_t &ctx) const { // Limited support of host scalar dst scales if (c_scales.is_host_scalar() && pd()->attr()->post_ops_.len() == 0) { CHECK(maybe_get_scale_as_float(c_scales_storage, scale_val)); + gpu_assert(scale_val != 0); alpha /= scale_val; } } diff --git a/src/gpu/intel/gemm/jit/gen_kernel.cpp b/src/gpu/intel/gemm/jit/gen_kernel.cpp index ac2ee380254..df294920a84 100644 --- a/src/gpu/intel/gemm/jit/gen_kernel.cpp +++ b/src/gpu/intel/gemm/jit/gen_kernel.cpp @@ -1131,7 +1131,7 @@ dsl::kernel_t get_dsl_kernel(const GEMMProblem &problem, if (k != -1) cset.add_constraint(gemm_desc.kernel_iface().find_arg("k") == k); } - return make_kernel(gemm_desc, cset); + return make_kernel(gemm_desc, std::move(cset)); }; status_t gen_kernel_t::get_kernel( diff --git a/src/gpu/intel/gemm/jit/generator_dsl/builder.cpp b/src/gpu/intel/gemm/jit/generator_dsl/builder.cpp index 9815b0f8d82..44156790820 100644 --- a/src/gpu/intel/gemm/jit/generator_dsl/builder.cpp +++ b/src/gpu/intel/gemm/jit/generator_dsl/builder.cpp @@ -561,7 +561,8 @@ struct generator_dsl_t { << strategy.prefetchB << " -> " << prefetchB; k_loop_config_t k_loop_main {k_blk, prefetchA, prefetchB, kloop_it, - A_load, B_load, A_prefetch_transform, B_prefetch_transform, C}; + std::move(A_load), std::move(B_load), A_prefetch_transform, + B_prefetch_transform, C}; gpu_assert(k_loop_main.A_load_warmup() % kloop_it.A_load().tile[k_var] == 0); @@ -573,8 +574,8 @@ struct generator_dsl_t { k_loop_config_t k_loop_short { (int)lcm(A_load_short.tile[k_var], B_load_short.tile[k_var]), 0, - 0, kloop_it, A_load_short, B_load_short, A_prefetch_transform, - B_prefetch_transform, C}; + 0, kloop_it, std::move(A_load_short), std::move(B_load_short), + A_prefetch_transform, B_prefetch_transform, std::move(C)}; gpu_assert(k_loop_short.k_warmup() == 0); if (problem.A.alignment) { diff --git a/src/gpu/intel/jit/codegen/codegen.cpp b/src/gpu/intel/jit/codegen/codegen.cpp index 19a2a4e3391..cda66394150 100644 --- a/src/gpu/intel/jit/codegen/codegen.cpp +++ b/src/gpu/intel/jit/codegen/codegen.cpp @@ -1806,7 +1806,7 @@ cl_kernel make_kernel(const kernel::iface_t &iface, const stmt_t &body, #define GPU_HW_CASE(hw) \ ir_to_ngen_generator_t> g( \ iface, options, debug_cfg); \ - g.setInterface(interface); \ + g.setInterface(std::move(interface)); \ convert_ir_to_ngen(body, g); \ return g.getKernel(ctx, dev); diff --git a/src/gpu/intel/jit/codegen/reorder.hpp b/src/gpu/intel/jit/codegen/reorder.hpp index 717d76b07b9..6d95d7fabc3 100644 --- a/src/gpu/intel/jit/codegen/reorder.hpp +++ b/src/gpu/intel/jit/codegen/reorder.hpp @@ -389,7 +389,7 @@ class reorder_impl_t { auto dt = to_ngen(layout.type()); auto buffer = init(into(elems), dt); buffer.stride = (uint8_t)1; - return {std::move(layout), buffer}; + return {std::move(layout), std::move(buffer)}; } layout_t make_retyped_layout( diff --git a/src/gpu/intel/jit/dsl/runtime.hpp b/src/gpu/intel/jit/dsl/runtime.hpp index 2d02d6d84fe..6abd4a95c48 100644 --- a/src/gpu/intel/jit/dsl/runtime.hpp +++ b/src/gpu/intel/jit/dsl/runtime.hpp @@ -30,7 +30,7 @@ namespace dsl { inline ::sycl::kernel make_kernel( const kernel_t &kernel, ::sycl::context ctx, ::sycl::device dev) { return make_kernel(kernel.iface, kernel.body, kernel.options, - kernel.debug_cfg, ctx, dev); + kernel.debug_cfg, std::move(ctx), std::move(dev)); } #endif #ifdef WITH_OPENCL_RUNTIME diff --git a/src/gpu/intel/jit/ir/message.cpp b/src/gpu/intel/jit/ir/message.cpp index e2da0bff795..63d1c7f0ace 100644 --- a/src/gpu/intel/jit/ir/message.cpp +++ b/src/gpu/intel/jit/ir/message.cpp @@ -711,7 +711,7 @@ bool access_builder_t::try_build_2d(send_params_t &send_params) { auto &send = _send.as(); stmt_ = stmt_t(); - auto vstart0 = mem_view_.vstart(); + const auto &vstart0 = mem_view_.vstart(); for (auto &start : vlayout.iter(tile)) { int access_size = send.access_size(); int access_elems = access_size / mem_type_.size(); diff --git a/src/gpu/intel/jit/ir/send_plan.cpp b/src/gpu/intel/jit/ir/send_plan.cpp index 569b38dcdc2..8be4d7be5a5 100644 --- a/src/gpu/intel/jit/ir/send_plan.cpp +++ b/src/gpu/intel/jit/ir/send_plan.cpp @@ -23,7 +23,6 @@ #include "common/utils.hpp" #include "gpu/intel/jit/ir/block_2d_utils.hpp" -#include "gpu/intel/jit/ir/hw.hpp" #include "gpu/intel/jit/ir/message.hpp" #include "gpu/intel/jit/ir/reorder.hpp" #include "gpu/intel/jit/pass/simplify.hpp" @@ -1364,7 +1363,6 @@ class view_info_t { private: dim_t get_block_alignment_bytes(size_t inner_idx) const { - if (inner_idx < 0) return 1; // Get base address. const auto &tlayout = view().tlayout(); const auto &type = vlayout().type(); diff --git a/src/gpu/intel/jit/ir/v2/reqs.cpp b/src/gpu/intel/jit/ir/v2/reqs.cpp index 14297170915..d01859d11a7 100644 --- a/src/gpu/intel/jit/ir/v2/reqs.cpp +++ b/src/gpu/intel/jit/ir/v2/reqs.cpp @@ -559,7 +559,7 @@ class req_impl_t { auto s = jit::parse(in); for (req_kind_t op : {req_kind_t::_or_eq, req_kind_t::eq, req_kind_t::ge, req_kind_t::le}) { - auto s_op = to_string(op); + const auto &s_op = to_string(op); auto pos = s.find(s_op); if (pos == std::string::npos) continue; auto s_lhs = s.substr(0, pos); diff --git a/src/gpu/intel/lrn/ref.hpp b/src/gpu/intel/lrn/ref.hpp index 04bb248680f..3dd7ac712c7 100644 --- a/src/gpu/intel/lrn/ref.hpp +++ b/src/gpu/intel/lrn/ref.hpp @@ -83,7 +83,6 @@ struct ref_fwd_t : public primitive_t { compute::kernel_ctx_t kernel_ctx; - status_t status = status::success; const auto *desc = pd()->desc(); kernel_ctx.set_data_type(desc->src_desc.data_type, false); @@ -102,7 +101,6 @@ struct ref_fwd_t : public primitive_t { break; default: VDISPATCH_LRN_IC(false, VERBOSE_BAD_ALGORITHM); } - if (status != status::success) return status; const memory_desc_wrapper src_d(pd()->src_md()); const memory_desc_wrapper dst_d(pd()->dst_md()); @@ -207,7 +205,6 @@ struct ref_bwd_t : public primitive_t { compute::kernel_ctx_t kernel_ctx; - status_t status = status::success; const auto *desc = pd()->desc(); kernel_ctx.set_data_type(desc->src_desc.data_type, false); @@ -223,7 +220,6 @@ struct ref_bwd_t : public primitive_t { break; default: VDISPATCH_LRN_IC(false, VERBOSE_BAD_ALGORITHM); } - if (status != status::success) return status; const memory_desc_wrapper src_d(pd()->src_md()); const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md()); diff --git a/src/gpu/intel/matmul/gemm.hpp b/src/gpu/intel/matmul/gemm.hpp index 38821ffb87a..44a4aa6d6f8 100644 --- a/src/gpu/intel/matmul/gemm.hpp +++ b/src/gpu/intel/matmul/gemm.hpp @@ -312,9 +312,9 @@ struct gemm_t : public primitive_t { c_md = &c_md_reshaped; if (with_bia) bias_md = &bia_md_reshaped; - gemm_attr.scales_ = reshaped_scales; - gemm_attr.zero_points_ = reshaped_zp; - gemm_attr.precomputed_reductions_ = reshaped_pr; + gemm_attr.scales_ = std::move(reshaped_scales); + gemm_attr.zero_points_ = std::move(reshaped_zp); + gemm_attr.precomputed_reductions_ = std::move(reshaped_pr); gemm_attr.post_ops_ = reshaped_post_ops; return status::success; }; diff --git a/src/gpu/intel/rnn/grid.cpp b/src/gpu/intel/rnn/grid.cpp index bc35280cb0b..f86cd9d5e56 100644 --- a/src/gpu/intel/rnn/grid.cpp +++ b/src/gpu/intel/rnn/grid.cpp @@ -1604,8 +1604,8 @@ status_t simple_common_t::execute_(const exec_ctx_t &ctx) const { bool is_lr = !one_of(conf.exec_dir, r2l, r2l); bool is_rl = !one_of(conf.exec_dir, l2r, l2r); - const memory_storage_t *scales_buf = nullptr; - if (pd()->conf.is_int8 && pd()->conf.copy_bias) { + const memory_storage_t *scales_buf = &memory_storage_t::empty_storage(); + if (conf.is_int8 && conf.copy_bias) { scales_buf = &CTX_GPU_RES_STORAGE(SCALES_); } diff --git a/src/gpu/intel/sdpa/micro.cpp b/src/gpu/intel/sdpa/micro.cpp index dc88fe34c34..8ae95820ba3 100644 --- a/src/gpu/intel/sdpa/micro.cpp +++ b/src/gpu/intel/sdpa/micro.cpp @@ -761,8 +761,10 @@ status_t micro_t::execute(const exec_ctx_t &ctx) const { if (pd()->with_host_scale()) { auto scalar_storage = utils::downcast< const dnnl::impl::host_scalar_memory_storage_t *>(&scale); - scalar_storage->get_scalar_value( + auto status = scalar_storage->get_scalar_value( &scalar_scale, scale_mdw.data_type_size()); + assert(status == status::success); + if (status != status::success) return status; scalar_scale = dnnl::impl::cpu::io::load_float_value( pd()->scale_md()->data_type, &scalar_scale, 0); inv_scalar_scale = 1. / scalar_scale; diff --git a/tests/gtests/internals/test_sdpa.cpp b/tests/gtests/internals/test_sdpa.cpp index cf56adafc9a..b9000a7265c 100644 --- a/tests/gtests/internals/test_sdpa.cpp +++ b/tests/gtests/internals/test_sdpa.cpp @@ -361,7 +361,7 @@ std::string print_row(const sdpa_dims_t &p) { ss << "|" << p.seq_len.kv; ss << "|" << p.seq_len.q; ss << "|" << p.key.dt; - if (!(p.key.dt == mdt::f16 || p.value.dt == mdt::bf16) + if (!(p.key.dt == mdt::f16 || p.key.dt == mdt::bf16) && p.qtype != quantize_type::no_quantization) { ss << "/" << p.key.sdt; ss << "/" << p.key.zpdt; @@ -562,7 +562,7 @@ sdpa_tensors_t get_descriptors(dnnl::engine &eng, dnnl::stream &strm, memory::dims mask_sz; switch (p.mask.type) { - case mask_type::no_mask: mask_sz = {}; + case mask_type::no_mask: mask_sz = {}; break; case mask_type::oneD: mask_sz = {1, 1, 1, p.seq_len.kv}; break; case mask_type::causal_br: case mask_type::causal_tl: diff --git a/third_party/ngen/ngen_auto_swsb.hpp b/third_party/ngen/ngen_auto_swsb.hpp index 499ea1d0252..92163b0f6fc 100644 --- a/third_party/ngen/ngen_auto_swsb.hpp +++ b/third_party/ngen/ngen_auto_swsb.hpp @@ -1794,7 +1794,7 @@ PVCWARWA analyzePVCWARWA(HW hw, Program &program, BasicBlock &bb, int phase, } // Case 2: walk forward, looking for a new target send instruction. - auto eligibleSend = [=, &program](uint32_t inum) { + auto eligibleSend = [=, &program, &dep](uint32_t inum) { auto &insn = program[inum]; if (inum != dep.inum && insn.predicated()) return false;