From 8de7987c9cb5fccd9e5c4d2c403d1c4a893a6309 Mon Sep 17 00:00:00 2001 From: Aditya K Kamath Date: Tue, 22 Sep 2020 21:50:44 +0530 Subject: [PATCH] Change addresses to 64-bit. Fixes errors with generic addressing --- src/abstract_hardware_model.h | 4 +-- src/cuda-sim/instructions.cc | 50 +++++++++++++++++------------------ 2 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/abstract_hardware_model.h b/src/abstract_hardware_model.h index 49f3e9f90..c012de0d8 100644 --- a/src/abstract_hardware_model.h +++ b/src/abstract_hardware_model.h @@ -75,8 +75,8 @@ enum AdaptiveCache { FIXED = 0, ADAPTIVE_VOLTA = 1 }; typedef unsigned long long new_addr_type; typedef unsigned long long cudaTextureObject_t; -typedef unsigned address_type; -typedef unsigned addr_t; +typedef unsigned long long address_type; +typedef unsigned long long addr_t; // the following are operations the timing model can see #define SPECIALIZED_UNIT_NUM 8 diff --git a/src/cuda-sim/instructions.cc b/src/cuda-sim/instructions.cc index 8936fa80e..0ab5a34f1 100644 --- a/src/cuda-sim/instructions.cc +++ b/src/cuda-sim/instructions.cc @@ -398,8 +398,8 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op, // global memory - g[4], g[$r0] mem = thread->get_global_memory(); type_info_key::type_decode(opType, size, t); - mem->read(result.u32, size / 8, &finalResult.u128); - thread->m_last_effective_address = result.u32; + mem->read(result.u64, size / 8, &finalResult.u128); + thread->m_last_effective_address = result.u64; thread->m_last_memory_space = global_space; if (opType == S16_TYPE || opType == S32_TYPE) @@ -408,8 +408,8 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op, // shared memory - s[4], s[$r0] mem = thread->m_shared_mem; type_info_key::type_decode(opType, size, t); - mem->read(result.u32, size / 8, &finalResult.u128); - thread->m_last_effective_address = result.u32; + mem->read(result.u64, size / 8, &finalResult.u128); + thread->m_last_effective_address = result.u64; thread->m_last_memory_space = shared_space; if (opType == S16_TYPE || opType == S32_TYPE) @@ -418,9 +418,9 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op, // const memory - ce0c1[4], ce0c1[$r0] mem = thread->get_global_memory(); type_info_key::type_decode(opType, size, t); - mem->read((result.u32 + op.get_const_mem_offset()), size / 8, + mem->read((result.u64 + op.get_const_mem_offset()), size / 8, &finalResult.u128); - thread->m_last_effective_address = result.u32; + thread->m_last_effective_address = result.u64; thread->m_last_memory_space = const_space; if (opType == S16_TYPE || opType == S32_TYPE) sign_extend(finalResult, size, dstInfo); @@ -428,8 +428,8 @@ ptx_reg_t ptx_thread_info::get_operand_value(const operand_info &op, // local memory - l0[4], l0[$r0] mem = thread->m_local_mem; type_info_key::type_decode(opType, size, t); - mem->read(result.u32, size / 8, &finalResult.u128); - thread->m_last_effective_address = result.u32; + mem->read(result.u64, size / 8, &finalResult.u128); + thread->m_last_effective_address = result.u64; thread->m_last_memory_space = local_space; if (opType == S16_TYPE || opType == S32_TYPE) sign_extend(finalResult, size, dstInfo); @@ -748,8 +748,8 @@ void ptx_thread_info::set_operand_value(const operand_info &dst, mem = thread->get_global_memory(); type_info_key::type_decode(type, size, t); - mem->write(dstData.u32, size / 8, &data.u128, thread, pI); - thread->m_last_effective_address = dstData.u32; + mem->write(dstData.u64, size / 8, &data.u128, thread, pI); + thread->m_last_effective_address = dstData.u64; thread->m_last_memory_space = global_space; } @@ -759,8 +759,8 @@ void ptx_thread_info::set_operand_value(const operand_info &dst, mem = thread->m_shared_mem; type_info_key::type_decode(type, size, t); - mem->write(dstData.u32, size / 8, &data.u128, thread, pI); - thread->m_last_effective_address = dstData.u32; + mem->write(dstData.u64, size / 8, &data.u128, thread, pI); + thread->m_last_effective_address = dstData.u64; thread->m_last_memory_space = shared_space; } @@ -770,8 +770,8 @@ void ptx_thread_info::set_operand_value(const operand_info &dst, mem = thread->m_local_mem; type_info_key::type_decode(type, size, t); - mem->write(dstData.u32, size / 8, &data.u128, thread, pI); - thread->m_last_effective_address = dstData.u32; + mem->write(dstData.u64, size / 8, &data.u128, thread, pI); + thread->m_last_effective_address = dstData.u64; thread->m_last_memory_space = local_space; } @@ -3367,7 +3367,7 @@ void ld_exec(const ptx_instruction *pI, ptx_thread_info *thread) { unsigned vector_spec = pI->get_vector(); memory_space *mem = NULL; - addr_t addr = src1_data.u32; + addr_t addr = src1_data.u64; decode_space(space, thread, src1, mem, addr); @@ -3440,7 +3440,7 @@ void mma_st_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) { memory_space_t space = pI->get_space(); memory_space *mem = NULL; - addr_t addr = addr_reg.u32; + addr_t addr = addr_reg.u64; new_addr_type mem_txn_addr[MAX_ACCESSES_PER_INSN_PER_THREAD]; int num_mem_txn = 0; @@ -3455,7 +3455,7 @@ void mma_st_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) { type_info_key::type_decode(type, size, t); if (core->get_gpu()->gpgpu_ctx->debug_tensorcore) printf("mma_st: thrd=%d, addr=%x, fp(size=%zu), stride=%d\n", thrd, - addr_reg.u32, size, src2_data.u32); + addr_reg.u64, size, src2_data.u32); addr_t new_addr = addr + thread_group_offset(thrd, wmma_type, wmma_layout, type, stride) * size / 8; @@ -3560,7 +3560,7 @@ void mma_ld_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) { memory_space_t space = pI->get_space(); memory_space *mem = NULL; - addr_t addr = src1_data.u32; + addr_t addr = src1_data.u64; smid = thread->get_hw_sid(); if (whichspace(addr) == shared_space) { addr = generic_to_shared(smid, addr); @@ -3573,7 +3573,7 @@ void mma_ld_impl(const ptx_instruction *pI, core_t *core, warp_inst_t &inst) { ptx_reg_t data[16]; if (core->get_gpu()->gpgpu_ctx->debug_tensorcore) printf("mma_ld: thrd=%d,addr=%x, fpsize=%zu, stride=%d\n", thrd, - src1_data.u32, size, src2_data.u32); + src1_data.u64, size, src2_data.u32); addr_t new_addr = addr + thread_group_offset(thrd, wmma_type, wmma_layout, type, stride) * @@ -5667,7 +5667,7 @@ void sst_impl(const ptx_instruction *pI, ptx_thread_info *thread) { memory_space_t space = pI->get_space(); memory_space *mem = NULL; addr_t addr = - src2_data.u32 * 4; // this assumes sstarr memory starts at address 0 + src2_data.u64 * 4; // this assumes sstarr memory starts at address 0 ptx_cta_info *cta_info = thread->m_cta_info; decode_space(space, thread, src1, mem, addr); @@ -5707,7 +5707,7 @@ void sst_impl(const ptx_instruction *pI, ptx_thread_info *thread) { // squeeze the zeros out of the array and store data back into original // array mem = NULL; - addr = src1_data.u32; + addr = src1_data.u64; space.set_type(global_space); decode_space(space, thread, src1, mem, addr); // store nonzero entries and indices @@ -5754,7 +5754,7 @@ void st_impl(const ptx_instruction *pI, ptx_thread_info *thread) { unsigned vector_spec = pI->get_vector(); memory_space *mem = NULL; - addr_t addr = addr_reg.u32; + addr_t addr = addr_reg.u64; decode_space(space, thread, dst, mem, addr); @@ -6490,13 +6490,13 @@ ptx_reg_t srcOperandModifiers(ptx_reg_t opData, operand_info opInfo, if (opInfo.get_addr_space() == global_space) { mem = thread->get_global_memory(); type_info_key::type_decode(type, size, t); - mem->read(opData.u32, size / 8, &result.u64); + mem->read(opData.u64, size / 8, &result.u64); if (type == S16_TYPE || type == S32_TYPE) sign_extend(result, size, dstInfo); } else if (opInfo.get_addr_space() == shared_space) { mem = thread->m_shared_mem; type_info_key::type_decode(type, size, t); - mem->read(opData.u32, size / 8, &result.u64); + mem->read(opData.u64, size / 8, &result.u64); if (type == S16_TYPE || type == S32_TYPE) sign_extend(result, size, dstInfo); @@ -6505,7 +6505,7 @@ ptx_reg_t srcOperandModifiers(ptx_reg_t opData, operand_info opInfo, mem = thread->get_global_memory(); type_info_key::type_decode(type, size, t); - mem->read((opData.u32 + opInfo.get_const_mem_offset()), size / 8, + mem->read((opData.u64 + opInfo.get_const_mem_offset()), size / 8, &result.u64); if (type == S16_TYPE || type == S32_TYPE)