diff --git a/libcuda/cuda_runtime_api.cc b/libcuda/cuda_runtime_api.cc index 9bdb993a9..18726eb0a 100644 --- a/libcuda/cuda_runtime_api.cc +++ b/libcuda/cuda_runtime_api.cc @@ -112,6 +112,7 @@ #include #include #include +#include #ifdef OPENGL_SUPPORT #define GL_GLEXT_PROTOTYPES #ifdef __APPLE__ @@ -1453,7 +1454,7 @@ void extract_code_using_cuobjdump(){ //! Read file into char* //TODO: convert this to C++ streams, will be way cleaner -char* readfile (const std::string filename){ +std::unique_ptr readfile (const std::string filename){ assert (filename != ""); FILE* fp = fopen(filename.c_str(),"r"); if (!fp) { @@ -1467,8 +1468,8 @@ char* readfile (const std::string filename){ filesize = ftell (fp); fseek (fp, 0, SEEK_SET); // allocate and copy the entire ptx - char* ret = (char*)malloc((filesize +1)* sizeof(char)); - fread(ret,1,filesize,fp); + std::unique_ptr ret(new char[filesize +1]); + fread(ret.get(),1,filesize,fp); ret[filesize]='\0'; fclose(fp); return ret; @@ -1503,7 +1504,7 @@ std::list pruneSectionList(std::list cuobj //Find the highest capability (that is lower than the forced maximum) for each cubin file //and set it in cuobjdumpSectionMap. Do this only for ptx sections std::map cuobjdumpSectionMap; - int min_ptx_capability_found=0; + unsigned min_ptx_capability_found=0; for ( std::list::iterator iter = cuobjdumpSectionList.begin(); iter != cuobjdumpSectionList.end(); iter++){ @@ -1540,7 +1541,7 @@ std::list pruneSectionList(std::list cuobj //! Merge all PTX sections that have a specific identifier into one file std::list mergeMatchingSections(std::list cuobjdumpSectionList, std::string identifier){ - const char *ptxcode = ""; + std::unique_ptr ptxcode; std::list::iterator old_iter; cuobjdumpPTXSection* old_ptxsection = NULL; cuobjdumpPTXSection* ptxsection; @@ -1560,9 +1561,9 @@ std::list mergeMatchingSections(std::list // Append all the PTX from the last PTX section into the current PTX section // Add 50 to ptxcode to ignore the information regarding version/target/address_size - if (strlen(ptxcode) >= 50) { + if ((ptxcode.get() != NULL) && strlen(ptxcode.get()) >= 50) { FILE *ptxfile = fopen((ptxsection->getPTXfilename()).c_str(), "a"); - fprintf(ptxfile, "%s", ptxcode + 50); + fprintf(ptxfile, "%s", ptxcode.get() + 50); fclose(ptxfile); } @@ -1718,7 +1719,7 @@ void cuobjdumpParseBinary(unsigned int handle){ cuobjdumpPTXSection* ptx = findPTXSection(fname); symbol_table *symtab; - char *ptxcode; + std::unique_ptr ptxcode; const char *override_ptx_name = getenv("PTX_SIM_KERNELFILE"); if (override_ptx_name == NULL or getenv("PTX_SIM_USE_PTX_FILE") == NULL) { ptxcode = readfile(ptx->getPTXfilename()); @@ -1726,24 +1727,29 @@ void cuobjdumpParseBinary(unsigned int handle){ printf("GPGPU-Sim PTX: overriding embedded ptx with '%s' (PTX_SIM_USE_PTX_FILE is set)\n", override_ptx_name); ptxcode = readfile(override_ptx_name); } - if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) { + const char *override_ptxplus_name = getenv("PTXPLUS_SIM_KERNELFILE"); + if((override_ptxplus_name != NULL) && (strlen(override_ptxplus_name) > 0)) { + // Use override PTXPLUS file, perhaps with cuobjdumped PTX, perhaps with above overloaded PTX + // This ignores .convert_to_ptxplus(), assuming that if the ENV variable is specified, then it should be used + printf("GPGPU-Sim PTX: overriding ptxplus with '%s' (PTXPLUS_SIM_KERNELFILE is set)\n", override_ptxplus_name); + std::unique_ptr ptxplus_str = readfile(override_ptxplus_name); + symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str.get(), handle); + } else if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) { cuobjdumpELFSection* elfsection = findELFSection(ptx->getIdentifier()); assert (elfsection!= NULL); - char *ptxplus_str = gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus( + std::string ptxplus_str = gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus( ptx->getPTXfilename(), elfsection->getELFfilename(), elfsection->getSASSfilename()); - symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str, handle); - printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle); - context->add_binary(symtab, handle); - gpgpu_ptxinfo_load_from_string( ptxcode, handle, max_capability ); - delete[] ptxplus_str; + symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str.c_str(), handle); } else { - symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxcode, handle); - printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle); - context->add_binary(symtab, handle); - gpgpu_ptxinfo_load_from_string( ptxcode, handle, max_capability ); + symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxcode.get(), handle); } + + printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle); + context->add_binary(symtab, handle); + gpgpu_ptxinfo_load_from_string( ptxcode.get(), handle, max_capability ); + load_static_globals(symtab,STATIC_ALLOC_LIMIT,0xFFFFFFFF,context->get_device()->get_gpgpu()); load_constants(symtab,STATIC_ALLOC_LIMIT,context->get_device()->get_gpgpu()); name_symtab[fname] = symtab; diff --git a/src/cuda-sim/ptx_loader.cc b/src/cuda-sim/ptx_loader.cc index 6c1b59549..c2a1694dd 100644 --- a/src/cuda-sim/ptx_loader.cc +++ b/src/cuda-sim/ptx_loader.cc @@ -99,7 +99,7 @@ void print_ptx_file( const char *p, unsigned source_num, const char *filename ) fflush(stdout); } -char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename) +std::string gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename) { printf("GPGPU-Sim PTX: converting EMBEDDED .ptx file to ptxplus \n"); @@ -131,8 +131,6 @@ char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilenam } fileStream.close(); - char* ptxplus_str = new char [strlen(text.c_str())+1]; - strcpy(ptxplus_str, text.c_str()); if (!m_ptx_save_converted_ptxplus){ char rm_commandline[1024]; @@ -148,7 +146,7 @@ char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilenam } printf("GPGPU-Sim PTX: DONE converting EMBEDDED .ptx file to ptxplus \n"); - return ptxplus_str; + return text; } diff --git a/src/cuda-sim/ptx_loader.h b/src/cuda-sim/ptx_loader.h index d3d0c923b..a176c73c9 100644 --- a/src/cuda-sim/ptx_loader.h +++ b/src/cuda-sim/ptx_loader.h @@ -33,7 +33,7 @@ extern bool g_override_embedded_ptx; class symbol_table *gpgpu_ptx_sim_load_ptx_from_string( const char *p, unsigned source_num ); void gpgpu_ptxinfo_load_from_string( const char *p_for_info, unsigned source_num, unsigned sm_version=20 ); -char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str); +std::string gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str); bool keep_intermediate_files(); #endif