Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/gpu/intel/conv/jit/normalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ uint32_t post_op_view_mapper_t::normalize_mask(uint32_t orig_mask) const {
if (b > 0) new_tile.set(b + 1, cvt_dims[b]);
}
new_tile[1] = cvt_dims[1];
cvt_dims = new_tile;
cvt_dims = std::move(new_tile);
}

uint32_t mask = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/conv/jit/plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2336,7 +2336,7 @@ class plan_builder_t {

if (plan_.hw < ngen::HW::XeHPG) {
// Verifies that SLM loads after k-slicing are at GRF granularity.
auto l_sub = l.sub(tile_t(rem_dims));
auto l_sub = l.sub(tile_t(std::move(rem_dims)));
int bytes = l_sub.type().size();
stride_t stride = 1;
for (auto &b : l_sub.blocks()) {
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/conv/jit/v2/kernel_desc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ static bool try_parse_internal_arg(std::string s, std::string &base_name,
denom = std::stoi(s.substr(pos));
s = s.substr(0, divup_pos);
}
base_name = s;
base_name = std::move(s);
return true;
}

Expand Down
4 changes: 2 additions & 2 deletions src/gpu/intel/conv/jit/zp_plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ class split_dispatcher_t {
for (int factor : {2, 4}) {
auto &splits = (abc == abc_kind_t::a) ? a_splits_ : b_splits_;
if ((int)splits.size() <= factor) splits.resize(factor + 1);
splits[factor] = split_t(
c_layout, mapper, abc, factor, simd_dim_idx_, simd_);
splits[factor] = split_t(c_layout, mapper, abc, factor,
static_cast<size_t>(simd_dim_idx_), simd_);
}
}
for (int b : {32, 16, 8}) {
Expand Down
1 change: 1 addition & 0 deletions src/gpu/intel/gemm/jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,7 @@ status_t gen_t::execute(const exec_ctx_t &ctx) const {
// Limited support of host scalar dst scales
if (c_scales.is_host_scalar() && pd()->attr()->post_ops_.len() == 0) {
CHECK(maybe_get_scale_as_float(c_scales_storage, scale_val));
gpu_assert(scale_val != 0);
alpha /= scale_val;
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/gemm/jit/gen_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ dsl::kernel_t get_dsl_kernel(const GEMMProblem &problem,
if (k != -1)
cset.add_constraint(gemm_desc.kernel_iface().find_arg("k") == k);
}
return make_kernel(gemm_desc, cset);
return make_kernel(gemm_desc, std::move(cset));
};

status_t gen_kernel_t::get_kernel(
Expand Down
7 changes: 4 additions & 3 deletions src/gpu/intel/gemm/jit/generator_dsl/builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,8 @@ struct generator_dsl_t {
<< strategy.prefetchB << " -> " << prefetchB;

k_loop_config_t k_loop_main {k_blk, prefetchA, prefetchB, kloop_it,
A_load, B_load, A_prefetch_transform, B_prefetch_transform, C};
std::move(A_load), std::move(B_load), A_prefetch_transform,
B_prefetch_transform, C};

gpu_assert(k_loop_main.A_load_warmup() % kloop_it.A_load().tile[k_var]
== 0);
Expand All @@ -573,8 +574,8 @@ struct generator_dsl_t {

k_loop_config_t k_loop_short {
(int)lcm(A_load_short.tile[k_var], B_load_short.tile[k_var]), 0,
0, kloop_it, A_load_short, B_load_short, A_prefetch_transform,
B_prefetch_transform, C};
0, kloop_it, std::move(A_load_short), std::move(B_load_short),
A_prefetch_transform, B_prefetch_transform, std::move(C)};
gpu_assert(k_loop_short.k_warmup() == 0);

if (problem.A.alignment) {
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/codegen/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1806,7 +1806,7 @@ cl_kernel make_kernel(const kernel::iface_t &iface, const stmt_t &body,
#define GPU_HW_CASE(hw) \
ir_to_ngen_generator_t<ngen::OpenCLCodeGenerator<(hw)>> g( \
iface, options, debug_cfg); \
g.setInterface(interface); \
g.setInterface(std::move(interface)); \
convert_ir_to_ngen(body, g); \
return g.getKernel(ctx, dev);

Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/codegen/reorder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ class reorder_impl_t {
auto dt = to_ngen(layout.type());
auto buffer = init(into<int>(elems), dt);
buffer.stride = (uint8_t)1;
return {std::move(layout), buffer};
return {std::move(layout), std::move(buffer)};
}

layout_t make_retyped_layout(
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/dsl/runtime.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ namespace dsl {
inline ::sycl::kernel make_kernel(
const kernel_t &kernel, ::sycl::context ctx, ::sycl::device dev) {
return make_kernel(kernel.iface, kernel.body, kernel.options,
kernel.debug_cfg, ctx, dev);
kernel.debug_cfg, std::move(ctx), std::move(dev));
}
#endif
#ifdef WITH_OPENCL_RUNTIME
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/ir/message.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ bool access_builder_t::try_build_2d(send_params_t &send_params) {
auto &send = _send.as<send_t>();

stmt_ = stmt_t();
auto vstart0 = mem_view_.vstart();
const auto &vstart0 = mem_view_.vstart();
for (auto &start : vlayout.iter(tile)) {
int access_size = send.access_size();
int access_elems = access_size / mem_type_.size();
Expand Down
2 changes: 0 additions & 2 deletions src/gpu/intel/jit/ir/send_plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@

#include "common/utils.hpp"
#include "gpu/intel/jit/ir/block_2d_utils.hpp"
#include "gpu/intel/jit/ir/hw.hpp"
#include "gpu/intel/jit/ir/message.hpp"
#include "gpu/intel/jit/ir/reorder.hpp"
#include "gpu/intel/jit/pass/simplify.hpp"
Expand Down Expand Up @@ -1364,7 +1363,6 @@ class view_info_t {

private:
dim_t get_block_alignment_bytes(size_t inner_idx) const {
if (inner_idx < 0) return 1;
// Get base address.
const auto &tlayout = view().tlayout();
const auto &type = vlayout().type();
Expand Down
2 changes: 1 addition & 1 deletion src/gpu/intel/jit/ir/v2/reqs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,7 @@ class req_impl_t {
auto s = jit::parse<std::string>(in);
for (req_kind_t op : {req_kind_t::_or_eq, req_kind_t::eq,
req_kind_t::ge, req_kind_t::le}) {
auto s_op = to_string(op);
const auto &s_op = to_string(op);
Copy link
Contributor

@hidefromkgb hidefromkgb Oct 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one might be dangerous.

to_string() creates a prvalue which is then bound to a constant reference. From C++17 onwards this is called temporary materialization and is legal — but are we sure that the usage of C++17 is mandated when building oneDNN?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

…I stand corrected.
I found the implementation (here and here) and it does return a precomputed value instead of creating it ad-hoc. Amazing!
Sorry for the noise.

auto pos = s.find(s_op);
if (pos == std::string::npos) continue;
auto s_lhs = s.substr(0, pos);
Expand Down
4 changes: 0 additions & 4 deletions src/gpu/intel/lrn/ref.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ struct ref_fwd_t : public primitive_t {

compute::kernel_ctx_t kernel_ctx;

status_t status = status::success;
const auto *desc = pd()->desc();

kernel_ctx.set_data_type(desc->src_desc.data_type, false);
Expand All @@ -102,7 +101,6 @@ struct ref_fwd_t : public primitive_t {
break;
default: VDISPATCH_LRN_IC(false, VERBOSE_BAD_ALGORITHM);
}
if (status != status::success) return status;

const memory_desc_wrapper src_d(pd()->src_md());
const memory_desc_wrapper dst_d(pd()->dst_md());
Expand Down Expand Up @@ -207,7 +205,6 @@ struct ref_bwd_t : public primitive_t {

compute::kernel_ctx_t kernel_ctx;

status_t status = status::success;
const auto *desc = pd()->desc();

kernel_ctx.set_data_type(desc->src_desc.data_type, false);
Expand All @@ -223,7 +220,6 @@ struct ref_bwd_t : public primitive_t {
break;
default: VDISPATCH_LRN_IC(false, VERBOSE_BAD_ALGORITHM);
}
if (status != status::success) return status;

const memory_desc_wrapper src_d(pd()->src_md());
const memory_desc_wrapper diff_dst_d(pd()->diff_dst_md());
Expand Down
6 changes: 3 additions & 3 deletions src/gpu/intel/matmul/gemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,9 +312,9 @@ struct gemm_t : public primitive_t {
c_md = &c_md_reshaped;
if (with_bia) bias_md = &bia_md_reshaped;

gemm_attr.scales_ = reshaped_scales;
gemm_attr.zero_points_ = reshaped_zp;
gemm_attr.precomputed_reductions_ = reshaped_pr;
gemm_attr.scales_ = std::move(reshaped_scales);
gemm_attr.zero_points_ = std::move(reshaped_zp);
gemm_attr.precomputed_reductions_ = std::move(reshaped_pr);
gemm_attr.post_ops_ = reshaped_post_ops;
return status::success;
};
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/intel/rnn/grid.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1604,8 +1604,8 @@ status_t simple_common_t<aprop>::execute_(const exec_ctx_t &ctx) const {
bool is_lr = !one_of(conf.exec_dir, r2l, r2l);
bool is_rl = !one_of(conf.exec_dir, l2r, l2r);

const memory_storage_t *scales_buf = nullptr;
if (pd()->conf.is_int8 && pd()->conf.copy_bias) {
const memory_storage_t *scales_buf = &memory_storage_t::empty_storage();
if (conf.is_int8 && conf.copy_bias) {
scales_buf = &CTX_GPU_RES_STORAGE(SCALES_);
}

Expand Down
4 changes: 3 additions & 1 deletion src/gpu/intel/sdpa/micro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -761,8 +761,10 @@ status_t micro_t::execute(const exec_ctx_t &ctx) const {
if (pd()->with_host_scale()) {
auto scalar_storage = utils::downcast<
const dnnl::impl::host_scalar_memory_storage_t *>(&scale);
scalar_storage->get_scalar_value(
auto status = scalar_storage->get_scalar_value(
&scalar_scale, scale_mdw.data_type_size());
assert(status == status::success);
if (status != status::success) return status;
scalar_scale = dnnl::impl::cpu::io::load_float_value(
pd()->scale_md()->data_type, &scalar_scale, 0);
inv_scalar_scale = 1. / scalar_scale;
Expand Down
4 changes: 2 additions & 2 deletions tests/gtests/internals/test_sdpa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ std::string print_row(const sdpa_dims_t &p) {
ss << "|" << p.seq_len.kv;
ss << "|" << p.seq_len.q;
ss << "|" << p.key.dt;
if (!(p.key.dt == mdt::f16 || p.value.dt == mdt::bf16)
if (!(p.key.dt == mdt::f16 || p.key.dt == mdt::bf16)
&& p.qtype != quantize_type::no_quantization) {
ss << "/" << p.key.sdt;
ss << "/" << p.key.zpdt;
Expand Down Expand Up @@ -562,7 +562,7 @@ sdpa_tensors_t get_descriptors(dnnl::engine &eng, dnnl::stream &strm,

memory::dims mask_sz;
switch (p.mask.type) {
case mask_type::no_mask: mask_sz = {};
case mask_type::no_mask: mask_sz = {}; break;
case mask_type::oneD: mask_sz = {1, 1, 1, p.seq_len.kv}; break;
case mask_type::causal_br:
case mask_type::causal_tl:
Expand Down
2 changes: 1 addition & 1 deletion third_party/ngen/ngen_auto_swsb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1794,7 +1794,7 @@ PVCWARWA analyzePVCWARWA(HW hw, Program &program, BasicBlock &bb, int phase,
}

// Case 2: walk forward, looking for a new target send instruction.
auto eligibleSend = [=, &program](uint32_t inum) {
auto eligibleSend = [=, &program, &dep](uint32_t inum) {
auto &insn = program[inum];
if (inum != dep.inum && insn.predicated())
return false;
Expand Down