-
Notifications
You must be signed in to change notification settings - Fork 576
Open
Description
Consider CUDA program that returns 64-bit integer value:
#include <iostream>
#include <stdint.h>
// Ensure printing of CUDA runtime errors to console
#define CUB_STDERR
#include <cub/cub.cuh>
const uint64_t C = UINT64_C(123456789012345);
__global__ void testInt64Kernel(uint64_t *v)
{
const int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index == 0)
{
*v = C;
}
}
int main(const int argc, const char *argv[])
{
uint64_t v = 0;
uint64_t *d_v;
CubDebugExit(cudaMalloc(&d_v, sizeof(uint64_t)));
testInt64Kernel<<<1, 1>>>(d_v);
CubDebugExit(cudaMemcpy(&v, d_v, sizeof(uint64_t), cudaMemcpyDeviceToHost));
printf("C: %016llx (%llu)\n", C, C);
printf("v: %016llx (%llu)\n", v, v);
return 0;
}
It outputs:
C: 00007048860ddf79 (123456789012345)
v: ffffffff860ddf79 (18446744071663640441)
i.e. high 32 bits of 64-bit value are all ones.
The PTX contains 64-bit literal integer:
//int64_test.cu:14 *v = C;
.loc 1 14 9
mov.u64 %rd3, 123456789012345;
st.global.u64 [%rd2], %rd3;
I was able to trace it to PTX parser that uses atoi
to parse literal and hence discards high bits:
gpgpu-sim_distribution/src/cuda-sim/ptx.l
Line 273 in a4ce3fe
[-]?[0-9]+U? TC; CHECK_UNSIGNED; yylval->int_value = atoi(yytext); return INT_OPERAND; |
and
gpgpu-sim_distribution/src/cuda-sim/ptx.y
Lines 644 to 646 in a4ce3fe
literal_operand : INT_OPERAND { recognizer->add_literal_int($1); } | |
| FLOAT_OPERAND { recognizer->add_literal_float($1); } | |
| DOUBLE_OPERAND { recognizer->add_literal_double($1); } |
gpgpu-sim_distribution/src/cuda-sim/ptx_parser.cc
Lines 869 to 872 in 90ec339
void ptx_recognizer::add_literal_int(int value) { | |
PTX_PARSE_DPRINTF("add_literal_int"); | |
g_operands.push_back(operand_info(value, gpgpu_ctx)); | |
} |
The fix does not seem trivial though as a lot of things assume INT_OPERAND is of type int.
Metadata
Metadata
Assignees
Labels
No labels