Skip to content

Dev override ptxplus #70

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 25 additions & 19 deletions libcuda/cuda_runtime_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@
#include <regex>
#include <sstream>
#include <fstream>
#include <memory>
#ifdef OPENGL_SUPPORT
#define GL_GLEXT_PROTOTYPES
#ifdef __APPLE__
Expand Down Expand Up @@ -1453,7 +1454,7 @@ void extract_code_using_cuobjdump(){

//! Read file into char*
//TODO: convert this to C++ streams, will be way cleaner
char* readfile (const std::string filename){
std::unique_ptr<char[]> readfile (const std::string filename){
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the motivation for removing the char*?

Copy link
Contributor Author

@pigrew pigrew Oct 9, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There was a memory leak in the calling code. The choices were to either use the auto-freeing unique_ptr or to add a free to the calling code. It's a coding convention question if it's desired that the code should should malloc/free or if it should use the "modern" C++ features.

Also it looks like I had commented that there was some "malloc" mixed with "delete" or "new" mixed with "free. This is against the C++ standard, but probably works with most standard libraries.

assert (filename != "");
FILE* fp = fopen(filename.c_str(),"r");
if (!fp) {
Expand All @@ -1467,8 +1468,8 @@ char* readfile (const std::string filename){
filesize = ftell (fp);
fseek (fp, 0, SEEK_SET);
// allocate and copy the entire ptx
char* ret = (char*)malloc((filesize +1)* sizeof(char));
fread(ret,1,filesize,fp);
std::unique_ptr<char[]> ret(new char[filesize +1]);
fread(ret.get(),1,filesize,fp);
ret[filesize]='\0';
fclose(fp);
return ret;
Expand Down Expand Up @@ -1503,7 +1504,7 @@ std::list<cuobjdumpSection*> pruneSectionList(std::list<cuobjdumpSection*> cuobj
//Find the highest capability (that is lower than the forced maximum) for each cubin file
//and set it in cuobjdumpSectionMap. Do this only for ptx sections
std::map<std::string, unsigned> cuobjdumpSectionMap;
int min_ptx_capability_found=0;
unsigned min_ptx_capability_found=0;
for ( std::list<cuobjdumpSection*>::iterator iter = cuobjdumpSectionList.begin();
iter != cuobjdumpSectionList.end();
iter++){
Expand Down Expand Up @@ -1540,7 +1541,7 @@ std::list<cuobjdumpSection*> pruneSectionList(std::list<cuobjdumpSection*> cuobj

//! Merge all PTX sections that have a specific identifier into one file
std::list<cuobjdumpSection*> mergeMatchingSections(std::list<cuobjdumpSection*> cuobjdumpSectionList, std::string identifier){
const char *ptxcode = "";
std::unique_ptr<char[]> ptxcode;
std::list<cuobjdumpSection*>::iterator old_iter;
cuobjdumpPTXSection* old_ptxsection = NULL;
cuobjdumpPTXSection* ptxsection;
Expand All @@ -1560,9 +1561,9 @@ std::list<cuobjdumpSection*> mergeMatchingSections(std::list<cuobjdumpSection*>

// Append all the PTX from the last PTX section into the current PTX section
// Add 50 to ptxcode to ignore the information regarding version/target/address_size
if (strlen(ptxcode) >= 50) {
if ((ptxcode.get() != NULL) && strlen(ptxcode.get()) >= 50) {
FILE *ptxfile = fopen((ptxsection->getPTXfilename()).c_str(), "a");
fprintf(ptxfile, "%s", ptxcode + 50);
fprintf(ptxfile, "%s", ptxcode.get() + 50);
fclose(ptxfile);
}

Expand Down Expand Up @@ -1718,32 +1719,37 @@ void cuobjdumpParseBinary(unsigned int handle){

cuobjdumpPTXSection* ptx = findPTXSection(fname);
symbol_table *symtab;
char *ptxcode;
std::unique_ptr<char[]> ptxcode;
const char *override_ptx_name = getenv("PTX_SIM_KERNELFILE");
if (override_ptx_name == NULL or getenv("PTX_SIM_USE_PTX_FILE") == NULL) {
ptxcode = readfile(ptx->getPTXfilename());
} else {
printf("GPGPU-Sim PTX: overriding embedded ptx with '%s' (PTX_SIM_USE_PTX_FILE is set)\n", override_ptx_name);
ptxcode = readfile(override_ptx_name);
}
if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) {
const char *override_ptxplus_name = getenv("PTXPLUS_SIM_KERNELFILE");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure we need another variable for this - we should probably just use the PTX_SIM_KERNELFILE variable and not override even if "context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus()". Basically just change the "if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) {" at line 1729 to "else if"

if((override_ptxplus_name != NULL) && (strlen(override_ptxplus_name) > 0)) {
// Use override PTXPLUS file, perhaps with cuobjdumped PTX, perhaps with above overloaded PTX
// This ignores .convert_to_ptxplus(), assuming that if the ENV variable is specified, then it should be used
printf("GPGPU-Sim PTX: overriding ptxplus with '%s' (PTXPLUS_SIM_KERNELFILE is set)\n", override_ptxplus_name);
std::unique_ptr<char[]> ptxplus_str = readfile(override_ptxplus_name);
symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str.get(), handle);
} else if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) {
cuobjdumpELFSection* elfsection = findELFSection(ptx->getIdentifier());
assert (elfsection!= NULL);
char *ptxplus_str = gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(
std::string ptxplus_str = gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(
ptx->getPTXfilename(),
elfsection->getELFfilename(),
elfsection->getSASSfilename());
symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str, handle);
printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle);
context->add_binary(symtab, handle);
gpgpu_ptxinfo_load_from_string( ptxcode, handle, max_capability );
delete[] ptxplus_str;
symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str.c_str(), handle);
} else {
symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxcode, handle);
printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle);
context->add_binary(symtab, handle);
gpgpu_ptxinfo_load_from_string( ptxcode, handle, max_capability );
symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxcode.get(), handle);
}

printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle);
context->add_binary(symtab, handle);
gpgpu_ptxinfo_load_from_string( ptxcode.get(), handle, max_capability );

load_static_globals(symtab,STATIC_ALLOC_LIMIT,0xFFFFFFFF,context->get_device()->get_gpgpu());
load_constants(symtab,STATIC_ALLOC_LIMIT,context->get_device()->get_gpgpu());
name_symtab[fname] = symtab;
Expand Down
6 changes: 2 additions & 4 deletions src/cuda-sim/ptx_loader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ void print_ptx_file( const char *p, unsigned source_num, const char *filename )
fflush(stdout);
}

char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename)
std::string gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename)
{

printf("GPGPU-Sim PTX: converting EMBEDDED .ptx file to ptxplus \n");
Expand Down Expand Up @@ -131,8 +131,6 @@ char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilenam
}
fileStream.close();

char* ptxplus_str = new char [strlen(text.c_str())+1];
strcpy(ptxplus_str, text.c_str());

if (!m_ptx_save_converted_ptxplus){
char rm_commandline[1024];
Expand All @@ -148,7 +146,7 @@ char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilenam
}
printf("GPGPU-Sim PTX: DONE converting EMBEDDED .ptx file to ptxplus \n");

return ptxplus_str;
return text;
}


Expand Down
2 changes: 1 addition & 1 deletion src/cuda-sim/ptx_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ extern bool g_override_embedded_ptx;

class symbol_table *gpgpu_ptx_sim_load_ptx_from_string( const char *p, unsigned source_num );
void gpgpu_ptxinfo_load_from_string( const char *p_for_info, unsigned source_num, unsigned sm_version=20 );
char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str);
std::string gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str);
bool keep_intermediate_files();

#endif