diff --git a/src/SConstruct b/src/SConstruct index 19988c6..4e134ce 100644 --- a/src/SConstruct +++ b/src/SConstruct @@ -35,7 +35,7 @@ bare64_env.Append(CCFLAGS=" -m64 -march=x86-64 -mno-red-zone", ASFLAGS=" -felf64") bare32_env = common_bare_env.Clone(ARCH_NAME="x86_32") -bare32_env.Append(CCFLAGS=" -m32 -march=i686 -mregparm=3 -fomit-frame-pointer", +bare32_env.Append(CCFLAGS=" -m32 -march=i386 -mregparm=3 -fomit-frame-pointer", ASFLAGS=" -felf32") bins = [] diff --git a/src/common/cpuid.cpp b/src/common/cpuid.cpp index 573a275..3618fc9 100644 --- a/src/common/cpuid.cpp +++ b/src/common/cpuid.cpp @@ -2,9 +2,47 @@ #include "cpuid.hpp" +bool cpuid_supported() +{ + #ifndef __x86_64__ + //We need to check for the existence of CPUID on 32-bit platforms! + unsigned int res1,res2; + asm( + "pushfl\n\t" //Save original interrupt state + "cli\n\t" //Block interrupts to be safe, as we're modifying the stack alignment, making this a critical section + "push %%ebp\n\t" //Save original stack base pointer + "mov %%esp,%%ebp\n\t" //Save original stack alignment + "and $-4,%%esp\n\t" //align stack + "pushfl\n\t" //Load... + "pop %%eax\n\t" //... old EFLAGS + "mov %%eax,%%ebx\n\t" //Copy of it for the result check + "xor $0x200000,%%eax\n\t" //Flip CPUID bit now + "push %%eax\n\t" + "popfl\n\t" //Store changed bit into flags + "pushfl\n\t" //New eflags back on the stack + "pop %%eax\n\t" //Get if it changed + "mov %%eax, %0\n\t" //Flipped eflags result + "mov %%ebx, %1\n\t" //Original eflags result + "mov %%ebp,%%esp\n\t" //Restore original stack alignment + "pop %%ebp\n\t" //Restore stack base pointer + "popfl" //Restore original interrupt state + : "=a" (res1), "=b" (res2)); + return (((res1 ^ res2) & 0x200000)!=0); //Has the CPUID bit changed and is supported? + #else + return true; //Always assumed supported! + #endif +} + cpuid_result get_cpuid(uint32_t leaf, uint32_t subleaf) { cpuid_result res; + if (!cpuid_supported()) //CPUID not supported? + { + res.eax = res.ebx = res.edx = res.ecx = 0; //Simply give empty result! + return res; //Give empty result! + } + + //CPUID is supported! asm ("cpuid" : "=a" (res.eax), "=b" (res.ebx), "=d" (res.edx), "=c" (res.ecx) @@ -47,3 +85,46 @@ bool has_nx() return get_cpuid_max_ext_level() >= 0x80000001 and (get_cpuid(0x80000001).edx & (1 << 20)); } + +bool has_smep() +{ + return get_cpuid_max_std_level() >= 7 + and (get_cpuid(0x7).ebx & (1 << 7)); +} + +bool has_pse() +{ + return get_cpuid_max_std_level() >= 1 + and (get_cpuid(0x1).edx & (1 << 3)); +} + +bool has_wp() +{ +#ifndef __x86_64__ + //We need to check for the existence of CR0 WP on 32-bit platforms! + unsigned int res1, res2; + asm( + "pushfl\n\t" //Save original interrupt state + "cli\n\t" //Block interrupts to be safe, as we're modifying the stack alignment, making this a critical section + "push %%ebp\n\t" //Save original stack base pointer + "mov %%esp,%%ebp\n\t" //Save original stack alignment + "and $-4,%%esp\n\t" //align stack. Important to prevent faulting on this down the road! + "pushfl\n\t" //Load... + "pop %%eax\n\t" //... old EFLAGS + "mov %%eax,%%ebx\n\t" //Copy of it for the result check + "xor $0x40000,%%eax\n\t" //Flip AC bit now + "push %%eax\n\t" + "popfl\n\t" //Store changed bit into flags + "pushfl\n\t" //New eflags back on the stack + "pop %%eax\n\t" //Get if it changed + "mov %%eax, %0\n\t" //Flipped eflags result + "mov %%ebx, %1\n\t" //Original eflags result + "mov %%ebp,%%esp\n\t" //Restore original stack alignment + "pop %%ebp\n\t" //Restore stack base pointer + "popfl" //Restore original interrupt state + : "=a" (res1), "=b" (res2)); + return (((res1 ^ res2) & 0x40000) != 0); //Has the AC bit changed and is supported (indicates WP bit is supported)? +#else + return true; //Always assumed supported! +#endif +} \ No newline at end of file diff --git a/src/common/include/cpuid.hpp b/src/common/include/cpuid.hpp index 05d6462..9bb4148 100644 --- a/src/common/include/cpuid.hpp +++ b/src/common/include/cpuid.hpp @@ -26,3 +26,12 @@ uint32_t get_cpuid_max_ext_level(); // Returns true, if the CPU reports being able to use the NX bit. bool has_nx(); + +// Returns true, if the CPU reports being able to use SMEP. +bool has_smep(); + +// Returns true, if the CPU reports being able to use PSE. +bool has_pse(); + +// Returns true, if the CPU reports being able to use the WP bit in CR0. +bool has_wp(); diff --git a/src/common/include/search.hpp b/src/common/include/search.hpp index 455b7b7..2e320f0 100644 --- a/src/common/include/search.hpp +++ b/src/common/include/search.hpp @@ -13,11 +13,20 @@ struct instruction_bytes { {} }; +class prefix_group_lut { +public: + int8_t data[256]; + + prefix_group_lut(size_t detect_prefixes_); //Prototype! +}; + class search_engine { instruction_bytes current_; size_t increment_at_ = 0; - const size_t max_prefixes_; + const size_t max_prefixes_; //How many prefixes to use at once. + const size_t used_prefixes_; //What prefixes to scan through. + prefix_group_lut group_lut_; //What group lut to use! public: @@ -37,7 +46,7 @@ class search_engine { return current_; } - search_engine(size_t max_prefixes = 0, instruction_bytes const &start = {}) - : current_(start), max_prefixes_(max_prefixes) + search_engine(size_t max_prefixes = 0, size_t used_prefixes = 0xFF, size_t detect_prefixes = 0xFF, instruction_bytes const &start = {}) + : current_(start), max_prefixes_(max_prefixes), used_prefixes_(used_prefixes), group_lut_(detect_prefixes) {} }; diff --git a/src/common/search.cpp b/src/common/search.cpp index 62831c4..15d3e65 100644 --- a/src/common/search.cpp +++ b/src/common/search.cpp @@ -4,11 +4,7 @@ #include "search.hpp" #include "util.hpp" -struct prefix_lut { - int8_t data[256]; -}; - -static constexpr int opcode_to_prefix_group(uint8_t byte) +static constexpr int opcode_to_prefix_group(uint8_t byte, size_t detect_prefixes_) { int group = -1; @@ -16,7 +12,10 @@ static constexpr int opcode_to_prefix_group(uint8_t byte) case 0xF0: // LOCK case 0xF2: // REPNE case 0xF3: // REP - group = 0; + if (detect_prefixes_ & (1<<0)) //To detect? + { + group = 0; + } break; case 0x2E: // CS case 0x36: // SS @@ -24,35 +23,41 @@ static constexpr int opcode_to_prefix_group(uint8_t byte) case 0x26: // ES case 0x64: // FS case 0x65: // GS - group = 1; + if (detect_prefixes_ & (1<<1)) //To detect? + { + group = 1; + } break; case 0x66: // operand size override - group = 2; + if (detect_prefixes_ & (1<<2)) //To detect? + { + group = 2; + } break; case 0x67: // address size override - group = 3; + if (detect_prefixes_ & (1<<3)) //To detect? + { + group = 3; + } break; case 0x40 ... 0x4F: // REX prefixes - group = 4; + if (detect_prefixes_ & (1<<4)) //To detect? + { + group = 4; + } break; } return group; } -static constexpr prefix_lut create_prefix_group_lut() +prefix_group_lut::prefix_group_lut(size_t detect_prefixes_) { - prefix_lut group_lut {}; - - for (size_t i = 0; i < array_size(group_lut.data); i++) { - group_lut.data[i] = (int8_t)opcode_to_prefix_group((uint8_t)i); + for (size_t i = 0; i < array_size(data); i++) { + data[i] = (int8_t)opcode_to_prefix_group((uint8_t)i,detect_prefixes_); } - - return group_lut; } -static prefix_lut prefix_group_lut {create_prefix_group_lut()}; - // Encapsulates which prefixes are there, where and how many there are. struct prefix_state { uint8_t count[5] {}; // Count of prefixes in each group. @@ -75,7 +80,20 @@ struct prefix_state { if (c >= 2) return true; } + + return false; + } + bool has_unused_prefixes(size_t used_prefixes_) const + { + //Detect used_prefixes and filter them out. + size_t b = 1; + for (size_t i = 0; i < array_size(count); i++, b <<= 1) + { + if (count[i] and ((used_prefixes_ & b) == 0)) //Prefix not to be used? + return true; + } + return false; } @@ -96,12 +114,12 @@ struct prefix_state { } }; -static prefix_state analyze_prefixes(instruction_bytes const &instr) +static prefix_state analyze_prefixes(prefix_group_lut const &group_lut_, instruction_bytes const &instr) { prefix_state state; for (size_t i = 0; i < sizeof(instr.raw); i++) { - int group = prefix_group_lut.data[instr.raw[i]]; + int group = group_lut_.data[instr.raw[i]]; if (group < 0) break; @@ -138,12 +156,14 @@ bool search_engine::find_next_candidate() goto again; } - auto const state = analyze_prefixes(current_); + auto const state = analyze_prefixes(group_lut_, current_); // Duplicated prefixes make the search space explode without generating // insight. Also enforce order on prefixes to further reduce search space. + // And also filter out prefixes that are declared not to be used. if (state.total_prefix_bytes() > max_prefixes_ or state.has_duplicated_prefixes() or + state.has_unused_prefixes(used_prefixes_) or not state.has_ordered_prefixes()) { goto again; } diff --git a/src/main.cpp b/src/main.cpp index 78a0415..1b11d95 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -140,6 +140,12 @@ struct options { // After how many instructions do we stop. Zero means don't stop. size_t stop_after = 0; + + // What prefixes to use. Zero means no prefixes are valid to use. The bits of the number are the opcode groups. + size_t used_prefixes = 0xFF; + + // What prefixes to detect. Zero means no prefixes are valid. The bits of the number are the opcode groups. + size_t detect_prefixes = 0xFF; }; // This will modify cmdline. @@ -158,6 +164,10 @@ static options parse_and_destroy_cmdline(char *cmdline) if (strcmp(key, "prefixes") == 0) res.prefixes = atoi(value); + if (strcmp(key, "used_prefixes") == 0) + res.used_prefixes = atoi(value); + if (strcmp(key, "detect_prefixes") == 0) + res.detect_prefixes = atoi(value); if (strcmp(key, "stop_after") == 0) res.stop_after = atoi(value); } @@ -182,7 +192,7 @@ void start(cpu_features const &features, char *cmdline) if (options.stop_after) format(">>> Stopping after ", options.stop_after, " execution attemps.\n"); - search_engine search { options.prefixes }; + search_engine search { options.prefixes, options.used_prefixes, options.detect_prefixes }; execution_attempt last_attempt; do { diff --git a/src/x86_32/arch.cpp b/src/x86_32/arch.cpp index 27cac9a..342c0ec 100644 --- a/src/x86_32/arch.cpp +++ b/src/x86_32/arch.cpp @@ -51,15 +51,52 @@ static bool is_aligned(uint64_t v, int order) static void setup_paging() { + bool pse_supported = has_pse(); //pse is supported on the CPU? + bool wp_supported = has_wp(); //wp is supported on the CPU? uintptr_t istart = reinterpret_cast(_image_start); uintptr_t iend = reinterpret_cast(_image_end); + uintptr_t page_tables_start = iend+(1U<<22); //Point to the end of the image to store our page tables! + //For now just store it there if required (assuming enough memory is installed)! + if (page_tables_start & 0xFFF) //Make sure to start on the next 4KB boundary if needed! + { + page_tables_start = (page_tables_start + 0xFFF) & ~0xFFF; //4KB boundary of next page! + } assert(is_aligned(istart, 22), "Image needs to start on large page boundary"); + uintptr_t tablepos = page_tables_start; //For looping sub-page tables in the PDE entries! + // Map our binary 1:1 for (uintptr_t c = istart; c <= iend; c += (1U << 22)) { - uintptr_t idx = c >> 22; - pdt[idx] = c | PTE_P | PTE_W | PTE_PS; + uintptr_t idx; + uintptr_t p; //The physical location! + idx = c >> 22; //What index in the page directory + if (pse_supported) //PSE supported? Map 4MB page tables! + { + p = c | PTE_PS; //Directly mapped! + } + else //Map 4KB PDE page directories to their page tables + { + p = tablepos; //Page table position! + tablepos += (1 << 12); //Move in 4KB chunks! + } + pdt[idx] = PTE_P | PTE_W | p; //Map PDE to page table or page + } + // Map additional page tables, if required (non-PSE systems). + if (!pse_supported) //4KB page tables are required? + { + tablepos = page_tables_start; //Generating pagetables here, requiring up to 4MB! + for (uintptr_t c = istart; c <= iend; c += (1U << 22)) //Process our range again for the page tables! + { + uintptr_t m = c; //Where to start mapping 4MB to! + uint32_t* t = reinterpret_cast(tablepos); //Backing page table in physical memory! + for (uintptr_t d = 0; d <= 1024;) //Map one 4MB page to linear memory + { + t[d++] = m | PTE_P | PTE_W; //4KB PTE + m += 4096; //Mapped 4KB of memory! + } + tablepos += 4096; //Next page table to fill! + } } // Map user page @@ -69,9 +106,12 @@ static void setup_paging() pdt[bit_select(32, 22, up)] = reinterpret_cast(user_pt) | PTE_U | PTE_P; user_pt[bit_select(22, 12, up)] = reinterpret_cast(get_user_page_backing()) | PTE_U | PTE_P; - set_cr4(get_cr4() | CR4_PSE | CR4_SMEP); + if (pse_supported || has_smep()) //Enable either pse or smep and supported? + { + set_cr4(get_cr4() | (pse_supported ? CR4_PSE : 0) | (has_smep() ? CR4_SMEP : 0)); + } set_cr3((uintptr_t)pdt); - set_cr0(get_cr0() | CR0_PG | CR0_WP); + set_cr0(get_cr0() | CR0_PG | (wp_supported?CR0_WP:0)); } static void setup_gdt()