diff --git a/libcuda/cuda_runtime_api.cc b/libcuda/cuda_runtime_api.cc
index 9bdb993a9..18726eb0a 100644
--- a/libcuda/cuda_runtime_api.cc
+++ b/libcuda/cuda_runtime_api.cc
@@ -112,6 +112,7 @@
 #include <regex>
 #include <sstream>
 #include <fstream>
+#include <memory>
 #ifdef OPENGL_SUPPORT
 #define GL_GLEXT_PROTOTYPES
 #ifdef __APPLE__
@@ -1453,7 +1454,7 @@ void extract_code_using_cuobjdump(){
 
 //! Read file into char*
 //TODO: convert this to C++ streams, will be way cleaner
-char* readfile (const std::string filename){
+std::unique_ptr<char[]> readfile (const std::string filename){
 	assert (filename != "");
 	FILE* fp = fopen(filename.c_str(),"r");
 	if (!fp) {
@@ -1467,8 +1468,8 @@ char* readfile (const std::string filename){
 	filesize = ftell (fp);
 	fseek (fp, 0, SEEK_SET);
 	// allocate and copy the entire ptx
-	char* ret = (char*)malloc((filesize +1)* sizeof(char));
-	fread(ret,1,filesize,fp);
+	std::unique_ptr<char[]> ret(new char[filesize +1]);
+	fread(ret.get(),1,filesize,fp);
 	ret[filesize]='\0';
 	fclose(fp);
 	return ret;
@@ -1503,7 +1504,7 @@ std::list<cuobjdumpSection*> pruneSectionList(std::list<cuobjdumpSection*> cuobj
 	//Find the highest capability (that is lower than the forced maximum) for each cubin file
 	//and set it in cuobjdumpSectionMap. Do this only for ptx sections
 	std::map<std::string, unsigned> cuobjdumpSectionMap;
-	int min_ptx_capability_found=0;
+	unsigned min_ptx_capability_found=0;
 	for (	std::list<cuobjdumpSection*>::iterator iter = cuobjdumpSectionList.begin();
 			iter != cuobjdumpSectionList.end();
 			iter++){
@@ -1540,7 +1541,7 @@ std::list<cuobjdumpSection*> pruneSectionList(std::list<cuobjdumpSection*> cuobj
 
 //! Merge all PTX sections that have a specific identifier into one file
 std::list<cuobjdumpSection*> mergeMatchingSections(std::list<cuobjdumpSection*> cuobjdumpSectionList, std::string identifier){
-	const char *ptxcode = "";
+	std::unique_ptr<char[]> ptxcode;
 	std::list<cuobjdumpSection*>::iterator old_iter;
 	cuobjdumpPTXSection* old_ptxsection = NULL;
 	cuobjdumpPTXSection* ptxsection;
@@ -1560,9 +1561,9 @@ std::list<cuobjdumpSection*> mergeMatchingSections(std::list<cuobjdumpSection*>
 
 			// Append all the PTX from the last PTX section into the current PTX section
 			// Add 50 to ptxcode to ignore the information regarding version/target/address_size
-			if (strlen(ptxcode) >= 50) {
+			if ((ptxcode.get() != NULL) && strlen(ptxcode.get()) >= 50) {
 				FILE *ptxfile = fopen((ptxsection->getPTXfilename()).c_str(), "a");
-				fprintf(ptxfile, "%s", ptxcode + 50);
+				fprintf(ptxfile, "%s", ptxcode.get() + 50);
 				fclose(ptxfile);
 			}
 
@@ -1718,7 +1719,7 @@ void cuobjdumpParseBinary(unsigned int handle){
 
 	cuobjdumpPTXSection* ptx = findPTXSection(fname);
 	symbol_table *symtab;
-	char *ptxcode;
+	std::unique_ptr<char[]> ptxcode;
 	const char *override_ptx_name = getenv("PTX_SIM_KERNELFILE"); 
 	if (override_ptx_name == NULL or getenv("PTX_SIM_USE_PTX_FILE") == NULL) {
 		ptxcode = readfile(ptx->getPTXfilename());
@@ -1726,24 +1727,29 @@ void cuobjdumpParseBinary(unsigned int handle){
 		printf("GPGPU-Sim PTX: overriding embedded ptx with '%s' (PTX_SIM_USE_PTX_FILE is set)\n", override_ptx_name);
 		ptxcode = readfile(override_ptx_name);
 	}
-	if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) {
+    const char *override_ptxplus_name = getenv("PTXPLUS_SIM_KERNELFILE");
+    if((override_ptxplus_name != NULL) && (strlen(override_ptxplus_name) > 0)) {
+        // Use override PTXPLUS file, perhaps with cuobjdumped PTX, perhaps with above overloaded PTX
+        // This ignores .convert_to_ptxplus(), assuming that if the ENV variable is specified, then it should be used
+        printf("GPGPU-Sim PTX: overriding ptxplus with '%s' (PTXPLUS_SIM_KERNELFILE is set)\n", override_ptxplus_name);
+        std::unique_ptr<char[]> ptxplus_str = readfile(override_ptxplus_name);
+        symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str.get(), handle);
+	} else if(context->get_device()->get_gpgpu()->get_config().convert_to_ptxplus() ) {
 		cuobjdumpELFSection* elfsection = findELFSection(ptx->getIdentifier());
 		assert (elfsection!= NULL);
-		char *ptxplus_str = gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(
+	    std::string ptxplus_str = gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(
 				ptx->getPTXfilename(),
 				elfsection->getELFfilename(),
 				elfsection->getSASSfilename());
-		symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str, handle);
-		printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle);
-		context->add_binary(symtab, handle);
-		gpgpu_ptxinfo_load_from_string( ptxcode, handle, max_capability );
-		delete[] ptxplus_str;
+		symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxplus_str.c_str(), handle);
 	} else {
-		symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxcode, handle);
-		printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle);
-		context->add_binary(symtab, handle);
-		gpgpu_ptxinfo_load_from_string( ptxcode, handle, max_capability );
+		symtab=gpgpu_ptx_sim_load_ptx_from_string(ptxcode.get(), handle);
 	}
+    
+    printf("Adding %s with cubin handle %u\n", ptx->getPTXfilename().c_str(), handle);
+    context->add_binary(symtab, handle);
+    gpgpu_ptxinfo_load_from_string( ptxcode.get(), handle, max_capability );
+
 	load_static_globals(symtab,STATIC_ALLOC_LIMIT,0xFFFFFFFF,context->get_device()->get_gpgpu());
 	load_constants(symtab,STATIC_ALLOC_LIMIT,context->get_device()->get_gpgpu());
 	name_symtab[fname] = symtab;
diff --git a/src/cuda-sim/ptx_loader.cc b/src/cuda-sim/ptx_loader.cc
index 6c1b59549..c2a1694dd 100644
--- a/src/cuda-sim/ptx_loader.cc
+++ b/src/cuda-sim/ptx_loader.cc
@@ -99,7 +99,7 @@ void print_ptx_file( const char *p, unsigned source_num, const char *filename )
    fflush(stdout);
 }
 
-char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename)
+std::string gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilename, const std::string elffilename, const std::string sassfilename)
 {
 
 	printf("GPGPU-Sim PTX: converting EMBEDDED .ptx file to ptxplus \n");
@@ -131,8 +131,6 @@ char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilenam
 	}
 	fileStream.close();
 
-	char* ptxplus_str = new char [strlen(text.c_str())+1];
-	strcpy(ptxplus_str, text.c_str());
 
 	if (!m_ptx_save_converted_ptxplus){
 		char rm_commandline[1024];
@@ -148,7 +146,7 @@ char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptxfilenam
 	}
 	printf("GPGPU-Sim PTX: DONE converting EMBEDDED .ptx file to ptxplus \n");
 
-	return ptxplus_str;
+	return text;
 }
 
 
diff --git a/src/cuda-sim/ptx_loader.h b/src/cuda-sim/ptx_loader.h
index d3d0c923b..a176c73c9 100644
--- a/src/cuda-sim/ptx_loader.h
+++ b/src/cuda-sim/ptx_loader.h
@@ -33,7 +33,7 @@ extern bool g_override_embedded_ptx;
  
 class symbol_table *gpgpu_ptx_sim_load_ptx_from_string( const char *p, unsigned source_num );
 void gpgpu_ptxinfo_load_from_string( const char *p_for_info, unsigned source_num, unsigned sm_version=20 );
-char* gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str);
+std::string gpgpu_ptx_sim_convert_ptx_and_sass_to_ptxplus(const std::string ptx_str, const std::string sass_str, const std::string elf_str);
 bool keep_intermediate_files();
 
 #endif