diff --git a/plugins/CMakeLists.txt b/plugins/CMakeLists.txt index 3f5a2cd..099dd76 100644 --- a/plugins/CMakeLists.txt +++ b/plugins/CMakeLists.txt @@ -68,6 +68,7 @@ add_subdirectory(syscall_tracer) add_subdirectory(bbstats) add_subdirectory(callstack) add_subdirectory(apicall_tracer) +add_subdirectory(memory_hash) add_subdirectory(memory_regions) add_subdirectory(pmemdump) #add_subdirectory(volatility) diff --git a/plugins/memory_hash/CMakeLists.txt b/plugins/memory_hash/CMakeLists.txt new file mode 100644 index 0000000..4f32ce1 --- /dev/null +++ b/plugins/memory_hash/CMakeLists.txt @@ -0,0 +1,22 @@ +set(PANDA_PLUGIN_NAME "memory_hash") +set(PLUGIN_TARGET "panda_${PANDA_PLUGIN_NAME}") + +# Set flags, build and link the actual plugin +Include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -std=c++11") +set(SRC_FILES ${PANDA_PLUGIN_NAME}.cpp pr61hash.cpp pr61hash_test.cpp) + +set(LINK_LIBS_I386 ${LINK_LIBS}) +set(LINK_LIBS_X86_64 ${LINK_LIBS}) + +set(LINK_LIBS_I386 ${LINK_LIBS} panda_ipanda-i386 panda_apicall_tracer-i386) +set(LINK_LIBS_X86_64 ${LINK_LIBS} panda_ipanda-x86_64 panda_apicall_tracer-x86_64) + +set(TARGET_DEPS_I386 panda_ipanda-i386) +set(TARGET_DEPS_X86_64 panda_ipanda-x86_64) + +add_i386_plugin(${PLUGIN_TARGET} SRC_FILES LINK_LIBS_I386) +add_x86_64_plugin(${PLUGIN_TARGET} SRC_FILES LINK_LIBS_X86_64) +#add_dependencies(${PLUGIN_TARGET}-i386 ${TARGET_DEPS_I386}) +#add_dependencies(${PLUGIN_TARGET}-x86_64 ${TARGET_DEPS_X86_64}) diff --git a/plugins/memory_hash/memory_hash.cpp b/plugins/memory_hash/memory_hash.cpp new file mode 100644 index 0000000..7ceb86b --- /dev/null +++ b/plugins/memory_hash/memory_hash.cpp @@ -0,0 +1,321 @@ +/* + * Memory Hash -- Page hashing PANDA plugin + */ + +// This needs to be defined before anything is included in order to get +// the PRIx64 macro +#define __STDC_FORMAT_MACROS +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "panda/plugin.h" +#include "panda/common.h" +#include "exec/cpu-defs.h" + +#include "ipanda/ipanda.h" +#include "ipanda/manager.h" +#include "ipanda/types.h" + +#include "apicall_tracer/trace_filter.h" + +#include "pr61hash.h" +//#include "pr61hash_test.h" + +#define PREFIX "[memory_hash] " + +// These need to be extern "C" so that the ABI is compatible with +// QEMU/PANDA, which is written in C +extern "C" { + bool init_plugin(void*); + void uninit_plugin(void*); +} + + +// Types for +using physical_t = uint64_t; +using virtual_t = uint64_t; +using asid_t = uint64_t; +using pr61hash_t = uint64_t; +using page_key_t = std::tuple; +#define ASID 0 +#define PAGE 1 + + +// ### Globals +bool s_memhash_initialized = false; + +// ### Memory Hash Plugin Variables +static FILE *output_fp; +static uint64_t phys_write_count = 0; +static std::map> pages_written; +static std::map hash_uncommitited; +static std::map hash_freq; // frequency of each hash that appears + +// filter variables +static std::shared_ptr os_manager; +static auto tracefilter = std::shared_ptr(); +static bool allowed = false; + + +void write_json() +{ + rapidjson::Document document; + document.SetObject(); + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + std::stringstream ss; + + // Handle Pages + rapidjson::Value pages_writtenj(rapidjson::kArrayType); + for (const auto& kv : pages_written) { + // asid + ss << std::dec << std::get(kv.first); + std::string asid = ss.str(); + ss.str(""); + ss.clear(); + + // page_id + ss << std::hex << std::uppercase << std::setw(10) << std::setfill('0') << std::get(kv.first); + std::string page_id = ss.str(); + ss.str(""); + ss.clear(); + + // hashes + rapidjson::Value deltasj(rapidjson::kArrayType); + for (const auto& delta : kv.second) { + ss << std::hex << std::uppercase << std::setw(16) << std::setfill('0') << delta; + std::string hash = ss.str(); + ss.str(""); + ss.clear(); + + rapidjson::Value value(hash.c_str(), allocator); + deltasj.PushBack(value, allocator); + } + + // element + rapidjson::Value ele(rapidjson::kObjectType); + rapidjson::Value asidj(asid.c_str(), allocator); + rapidjson::Value page_idj(page_id.c_str(), allocator); + ele.AddMember("asid", asidj, allocator); + ele.AddMember("page_id", page_idj, allocator); + ele.AddMember("hashes", deltasj, allocator); + + pages_writtenj.PushBack(ele, allocator); + } + document.AddMember("pages", pages_writtenj, allocator); + + // Output Json + char writeBuffer[65536]; // Buffer for writing, recommend size by docs + rapidjson::FileWriteStream os(output_fp, writeBuffer, sizeof(writeBuffer)); + rapidjson::Writer writer(os); + document.Accept(writer); +} + +bool mh_check_allowlist(CPUState *env) { + // This check is very slow and expensive + ipanda_types::Process current_process; + os_manager->get_current_process(env, current_process); + return tracefilter->quickCheck(current_process.pid, current_process.asid); +} + + +void mh_virt_mem_before_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) +{ + if (!allowed || panda_in_kernel(env)) return; + + physical_t vpage_id = (vaddr & ~(0xFFF)); + physical_t vpage_offset = (vaddr & (0xFFF)); + uint8_t buffer[PAGE_SIZE]; + pr61hash_t hash_new = (pr61hash_t)-1; + page_key_t current_page_key; + + // "before" hook is called up to 3 times before successful page read, + // first fault for TLB, second fault for process PT. + if (panda_virtual_memory_read(env, vpage_id, buffer, PAGE_SIZE) == -1) return; + // successful page read by this point. + + asid_t asid = panda_current_asid(env); + current_page_key = std::make_tuple(asid, vpage_id); + + // if first encounter with page + if (pages_written.find(current_page_key) == pages_written.end()) { + // Calculate the full hash and update structures + hash_new = full_poly_hash(buffer); + pages_written[current_page_key] = { hash_new }; + } else { + // Theres a chance something changes in the page between committed hashes. + // Check by calculating the full hash and comparing to last hash for the page. + pr61hash_t hash_curr = full_poly_hash(buffer); + pr61hash_t hash_prev = pages_written[current_page_key].back(); + //std::cout << PREFIX "before PAGE unchanged check: " << (hash_curr != hash_prev) + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " HASH_NEW: " << hash_curr + // << " HASH_OLD: " << hash_prev + // << std::dec << std::setw(0) << std::endl; + if (hash_curr != hash_prev) { + //std::cerr << PREFIX "WARNING: page update went undetected, syncing..." << std::endl; + pages_written[current_page_key].push_back(hash_curr); + hash_freq[hash_curr]++; + } + } + + // adjust page size if needed + if (size+vpage_offset > PAGE_SIZE) { + size = PAGE_SIZE - vpage_offset; + } + + pr61hash_t hash_old = pages_written[current_page_key].back(); + hash_new = apply_delta(hash_old, &buffer[vpage_offset], buf, size, vpage_offset); + hash_uncommitited[current_page_key] = hash_new; + hash_freq[hash_new]++; + + //std::cout << PREFIX "DEBUG: before" + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " ASID: 0x" << asid + // << " PAGE: 0x" << vpage_id + // << " OFFSET: 0x" << vpage_offset + // << " SIZE: 0x" << size + // << " MULTIPAGE: " << (vpage_offset + size > PAGE_SIZE) + // << " HASH: " << hash_new + // << std::dec << std::setw(0) << std::endl; +} + +void mh_virt_mem_after_write(CPUState *env, target_ptr_t pc, target_ptr_t vaddr, size_t size, uint8_t *buf) +{ + if (!allowed || panda_in_kernel(env)) return; + + asid_t asid = panda_current_asid(env); + physical_t vpage_id = (vaddr & ~(0xFFF)); + //physical_t vpage_offset = (vaddr & (0xFFF)); + uint8_t buffer[PAGE_SIZE]; + + if (panda_virtual_memory_read(env, vpage_id, buffer, PAGE_SIZE) == -1) return; + + page_key_t current_page_key = std::make_tuple(asid, vpage_id); + pr61hash_t hash_new = hash_uncommitited[current_page_key]; + + pr61hash_t hash_prev = pages_written[current_page_key].back(); + if (hash_prev == hash_new) return; + + //pr61hash_t hash_a = full_poly_hash(buffer); + //std::cout << PREFIX "after PAGE unchanged check: " << (hash_a != hash_new) + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " HASH_NEW: " << hash_a + // << " HASH_OLD: " << hash_new + // << std::dec << std::setw(0) << std::endl; + //assert(hash_a == hash_new); + + //commit hash + pages_written[current_page_key].push_back(hash_new); + phys_write_count++; + + //std::cout << PREFIX << "DEBUG: after" + // << std::hex << std::uppercase << std::setw(16) << std::setfill('0') + // << " ASID: 0x" << asid + // << " PAGE: 0x" << vpage_id + // << " OFFSET: 0x" << vpage_offset + // << " SIZE: 0x" << size + // << " MULTIPAGE: " << (vpage_offset + size > PAGE_SIZE) + // << " COMMITTED: " << hash_new + // << std::dec << std::setw(0) << std::endl; +} + + +bool mh_process_change(CPUState* env, target_ulong oldval, target_ulong newval) +{ + allowed = mh_check_allowlist(env); + return false; +} + + +void init_memhash(CPUState* env) +{ + std::cout << "initializing memhash" << std::endl; + // ipanda must load on/after first instruction + if (!init_ipanda(env, os_manager)) { + fprintf(stderr, "Could not initialize the introspection library.\n"); + return; + } + + std::cout << "initialized memhash" << std::endl; + //allowed = mh_check_allowlist(env); + s_memhash_initialized = true; +} + + +bool init_plugin(void* self) +{ + panda_cb pcb; + panda_arg_list* memhash_args = panda_get_args("memory_hash"); + + // --panda-arg memory_hash:filter=filter.json + const char* filter_file = strdup(panda_parse_string(memhash_args, "filter", "")); + if (filter_file[0] == '\0') { + std::cerr << "ERROR: filter not provided" << std::endl; + return false; + } + tracefilter.reset(new TraceFilter(filter_file)); + + // --panda-arg memory_hash:output=output.json + const char* output_file = strdup(panda_parse_string(memhash_args, "output", "")); + if (output_file[0] == '\0') { + std::cerr << "ERROR: output not provided" << std::endl; + return false; + } + output_fp = fopen(output_file, "w"); + panda_free_args(memhash_args); + + // Test Hash lib + //test_pr61(); + + // enable memory callbacks, turned off by defualt + panda_enable_memcb(); + + // Post vm load initialization + pcb.after_loadvm = (reinterpret_cast(init_memhash)); + panda_register_callback(self, PANDA_CB_AFTER_LOADVM, pcb); + + pcb.virt_mem_before_write = mh_virt_mem_before_write; + panda_register_callback(self, PANDA_CB_VIRT_MEM_BEFORE_WRITE, pcb); + + pcb.virt_mem_after_write = mh_virt_mem_after_write; + panda_register_callback(self, PANDA_CB_VIRT_MEM_AFTER_WRITE, pcb); + + // Track process changes to optimize checks for target threads + pcb.asid_changed = mh_process_change; + panda_register_callback(self, PANDA_CB_ASID_CHANGED, pcb); + + std::cout << "loaded MEM_HASH" << std::endl; + return true; +} + + +void uninit_plugin(void* self) +{ + std::cout << PREFIX "unloading..." << std::endl; + std::cout << PREFIX "individual page writes: " << phys_write_count << std::endl; + std::cout << PREFIX "unique pages pritten to: " << pages_written.size() << std::endl; + std::cout << PREFIX "unique hashes: " << hash_freq.size() << std::endl; + + std::cout << std::hex; + for (const auto& kv : hash_freq) { + if (kv.second > 100) { + std::cout << kv.first << " - " << kv.second << std::endl; + } + } + std::cout << std::dec; + + std::cout << PREFIX "writing json output..." << std::endl; + write_json(); + fclose(output_fp); + std::cout << PREFIX "done." << std::endl; +} diff --git a/plugins/memory_hash/pr61hash.cpp b/plugins/memory_hash/pr61hash.cpp new file mode 100644 index 0000000..57d87cd --- /dev/null +++ b/plugins/memory_hash/pr61hash.cpp @@ -0,0 +1,63 @@ +#include +#include +#include +#include + +#include "pr61hash.h" + +#define PREFIX "[pr61_hash]" + +// ### Polynomial Rolling 61-bit Hash -- unofficially calling pr61 + +constexpr std::array poly_hash_powers() +// Pre-compute powers +{ + std::array _powers = {1,}; + for (size_t i = 1; i < PAGE_SIZE; i++) { + _powers[i] = (_powers[i-1] * P) % MOD; + } + return _powers; +} + +static std::array powers = poly_hash_powers(); + +inline uint64_t mulmod(uint64_t a, uint64_t b) +// Multiplication under Modulus +{ + return (uint64_t)(((__uint128_t)a*b) % MOD); +} + +uint64_t full_poly_hash(uint8_t* page) +// Full page hash compute +{ + uint64_t hash = 0; + for (size_t i = 0; i < PAGE_SIZE; i++) { + //std::cout << powers[i] << std::endl; + hash = (hash + mulmod(page[i], powers[i])) % MOD; + } + return hash; +} + +uint64_t delta_poly_hash(uint64_t hash, size_t idx, uint8_t oldv, uint8_t newv) +// Compute a single value change in page to apply to old hash +{ + if (idx >= PAGE_SIZE) { + std::cerr << PREFIX "ERROR index issue" << std::endl; + return hash; + } + + uint64_t newd = mulmod(newv, powers[idx]); + uint64_t oldd = mulmod(oldv, powers[idx]); + return (hash + newd - oldd + MOD) % MOD; +} + +uint64_t apply_delta(uint64_t hash, uint8_t *old_buffer, uint8_t *new_buffer, size_t size, size_t offset) +// Compute all changes and apply to current hash. NOTE: Buffers expected to be PAGE_SIZE +{ + uint64_t curr_h = hash; + for (size_t i = 0; i < size; i++) { + curr_h = delta_poly_hash(curr_h, offset+i, old_buffer[i], new_buffer[i]); + } + return curr_h; +} + diff --git a/plugins/memory_hash/pr61hash.h b/plugins/memory_hash/pr61hash.h new file mode 100644 index 0000000..1eccfa9 --- /dev/null +++ b/plugins/memory_hash/pr61hash.h @@ -0,0 +1,13 @@ +#ifndef POLY_HASH_H +#define POLY_HASH_H + +constexpr size_t PAGE_SIZE = 4096; // x86_64 - 4KiB is default +constexpr uint64_t MOD = (1ULL << 61) - 1; +constexpr uint64_t P = 257; + +void init_poly_hash_powers(); +uint64_t full_poly_hash(uint8_t *page); +uint64_t delta_poly_hash(uint64_t h, size_t i, uint8_t oldv, uint8_t newv); +uint64_t apply_delta(uint64_t h, uint8_t *old_buffer, uint8_t *new_buffer, size_t size, size_t offset); + +#endif diff --git a/plugins/memory_hash/pr61hash_test.cpp b/plugins/memory_hash/pr61hash_test.cpp new file mode 100644 index 0000000..7d012f2 --- /dev/null +++ b/plugins/memory_hash/pr61hash_test.cpp @@ -0,0 +1,79 @@ +#include +#include +#include +#include + +#include "pr61hash.h" + +const uint64_t PAGE_HASH = 0x1b1d8d0b72aebff6; + + +void test_fw_bw_sweep() +{ + uint8_t page1[PAGE_SIZE] = {}; + uint8_t page2[PAGE_SIZE] = {}; + + uint64_t page1_hash = full_poly_hash(page1); + uint64_t page2_hash = full_poly_hash(page2); + + std::cout << std::hex; + for (size_t i = 0; i < PAGE_SIZE; i++) { + page1[i] = i % 256; + page1_hash = delta_poly_hash(page1_hash, i, 0, i%256); + //std::cout << "page1 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page1_hash << std::endl; + } + std::cout << "full - ___ - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; + assert(page1_hash == PAGE_HASH); + + for (int i = PAGE_SIZE-1; i >= 0; i--) { + page2[i] = i % 256; + page2_hash = delta_poly_hash(page2_hash, i, 0, i%256); + //std::cout << "page2 - " << i << " - 0x"<< std::setw(16) << std::setfill('0') << page2_hash << std::endl; + } + std::cout << "full - ___ - 0x" << std::setw(16) << std::setfill('0') << full_poly_hash(page1) << std::endl; + assert(full_poly_hash(page2) == full_poly_hash(page1)); + assert(page2_hash == PAGE_HASH); + std::cout << std::dec; + std::cout << "test_fw_bw_sweep Success!" << std::endl; +} + + +void test_apply_delta() { + uint8_t page[PAGE_SIZE] = {}; + uint64_t hash = 0; // Starting hash for empty page + uint64_t hash_new; + + const size_t sized = 8; + uint8_t delta[sized] = {'A', 'B', 'C', 'D', 0x32, 0xFF, 0x20, 0x90}; + uint8_t buffer1[PAGE_SIZE] = {}; + uint8_t buffer2[PAGE_SIZE] = {}; + + //Test 1 + uint16_t offset = 1024; + memcpy(&buffer1[offset], delta, sized); + hash_new = apply_delta(hash, page, buffer1, sized, offset); + memcpy(&page[offset], delta, sized); + hash = full_poly_hash(page); + std::cout << "delta: " << std::hex << std::setw(16) << std::setfill('0') << hash_new<< std::endl; + std::cout << "full: " << std::hex << std::setw(16) << std::setfill('0') << hash << std::endl; + assert(hash == hash_new); + + //Test 2 + offset = 2048; + memcpy(&buffer2[offset], delta, sized); + hash_new = apply_delta(hash, page, buffer2, sized, offset); + memcpy(&page[offset], delta, sized); + hash = full_poly_hash(page); + std::cout << "delta: " << std::hex << std::setw(16) << std::setfill('0') << hash_new<< std::endl; + std::cout << "full: " << std::hex << std::setw(16) << std::setfill('0') << hash << std::endl; + assert(hash == hash_new); + + std::cout << "test_apply_delta Success!" << std::endl; +} + +// Test Suite +void test_pr61() +{ + test_apply_delta(); + test_fw_bw_sweep(); +} diff --git a/plugins/memory_hash/pr61hash_test.h b/plugins/memory_hash/pr61hash_test.h new file mode 100644 index 0000000..b6a5e42 --- /dev/null +++ b/plugins/memory_hash/pr61hash_test.h @@ -0,0 +1,5 @@ +#ifndef POLY_HASH_TEST_H +#define POLY_HASH_TEST_H + +void test_pr61(); +#endif