From c1afe65dfb15519cea4a383c526a0c738631217a Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Wed, 27 Sep 2023 22:12:20 -0400 Subject: [PATCH 1/4] Add debug_infos array to pe module. Fun fact: debug information is actually an array of structures. Historically, YARA has stopped parsing after finding the first entry with a PDB path (with some other restrictions around the type of debug entry this is). However, each entry can have different information (including pdb paths), so let's add an array of debug_infos structures which contain timestamp, type and pdb path. Just in testing I discovered legit binaries that have different PDB paths in them, which is actually kind of interesting. --- libyara/modules/pe/pe.c | 53 +++++++++++++++++++++++++++++++++++++---- tests/test-pe.c | 28 ++++++++++++++++++++-- 2 files changed, 74 insertions(+), 7 deletions(-) diff --git a/libyara/modules/pe/pe.c b/libyara/modules/pe/pe.c index a9c96d9c36..e9210a7208 100644 --- a/libyara/modules/pe/pe.c +++ b/libyara/modules/pe/pe.c @@ -298,6 +298,9 @@ static void pe_parse_debug_directory(PE* pe) PIMAGE_DEBUG_DIRECTORY debug_dir; int64_t debug_dir_offset; int i, dcount; + // Use pdb_done to determine if we have already parsed the first available PDB + // path. + int parsed_dirs = 0, pdb_done = 0; size_t pdb_path_len; char* pdb_path = NULL; @@ -332,6 +335,27 @@ static void pe_parse_debug_directory(PE* pe) if (!struct_fits_in_pe(pe, debug_dir, IMAGE_DEBUG_DIRECTORY)) break; + // Intentionally pulling out timestamps even if it isn't CODEVIEW as it is + // still useful to know. + yr_set_integer( + yr_le32toh(debug_dir->TimeDateStamp), + pe->object, + "debug_infos[%i].timestamp", + i); + + // Intentionally pulling out timestamps even if it isn't CODEVIEW as it is + // still useful to know. + yr_set_integer( + yr_le32toh(debug_dir->Type), + pe->object, + "debug_infos[%i].type", + i); + + // Increment parsed_dirs here because we have filled in part of the + // structure at this index in the array, even if we can only populate other + // information from CODEVIEW debug entries. + parsed_dirs++; + if (yr_le32toh(debug_dir->Type) != IMAGE_DEBUG_TYPE_CODEVIEW) continue; @@ -387,11 +411,20 @@ static void pe_parse_debug_directory(PE* pe) if (pdb_path_len > 0 && pdb_path_len < MAX_PATH) { - yr_set_sized_string(pdb_path, pdb_path_len, pe->object, "pdb_path"); - break; + // Earlier versions of YARA only parsed the first debug entry with a PDB + // path. We have to maintain this for backwards compatability reasons. + if (!pdb_done) + { + yr_set_sized_string(pdb_path, pdb_path_len, pe->object, "pdb_path"); + } + // We always parse all PDB paths for debug_infos array. + yr_set_sized_string( + pdb_path, pdb_path_len, pe->object, "debug_infos[%i].pdb_path", i); } } } + + yr_set_integer(parsed_dirs, pe->object, "number_of_debug_infos"); } // Return a pointer to the resource directory string or NULL. @@ -1656,7 +1689,10 @@ static void pe_parse_exports(PE* pe) ordinal_base + i, pe->object, "export_details[%i].ordinal", exp_sz); yr_set_integer( - yr_le32toh(function_addrs[i]), pe->object, "export_details[%i].rva", exp_sz); + yr_le32toh(function_addrs[i]), + pe->object, + "export_details[%i].rva", + exp_sz); // Don't check for a failure here since some packers make this an invalid // value. @@ -1758,8 +1794,8 @@ void _process_authenticode( const Authenticode* authenticode = auth_array->signatures[i]; signature_valid |= authenticode->verify_flags == AUTHENTICODE_VFY_VALID - ? true - : false; + ? true + : false; yr_set_integer( signature_valid, pe->object, "signatures[%i].verified", *sig_count); @@ -3795,6 +3831,13 @@ begin_declarations declare_function("is_32bit", "", "i", is_32bit); declare_function("is_64bit", "", "i", is_64bit); + declare_integer("number_of_debug_infos"); + begin_struct_array("debug_infos") + declare_integer("type"); + declare_integer("timestamp"); + declare_string("pdb_path"); + end_struct_array("debug_infos"); + declare_integer("number_of_imports"); declare_integer("number_of_imported_functions"); declare_integer("number_of_delayed_imports"); diff --git a/tests/test-pe.c b/tests/test-pe.c index 2b0c4ceb42..51763c63ea 100644 --- a/tests/test-pe.c +++ b/tests/test-pe.c @@ -255,6 +255,26 @@ int main(int argc, char** argv) }", "tests/data/tiny"); + // Be sure to check pdb_path (not in the struct) to make sure we maintain + // historical parsing behavior. + assert_true_rule_file( + "import \"pe\" \ + rule test { \ + condition: \ + pe.number_of_debug_infos == 3 and \ + pe.debug_infos[0].type == 2 and \ + pe.debug_infos[0].timestamp == 1827812126 and \ + pe.debug_infos[0].pdb_path == \"mtxex.pdb\" and \ + pe.debug_infos[1].type == 13 and \ + pe.debug_infos[1].timestamp == 1827812126 and \ + not defined pe.debug_infos[1].pdb_path and \ + pe.debug_infos[2].type == 16 and \ + pe.debug_infos[2].timestamp == 1827812126 and \ + not defined pe.debug_infos[2].pdb_path and \ + pe.pdb_path == \"mtxex.pdb\" \ + }", + "tests/data/mtxex.dll"); + #if defined(HAVE_LIBCRYPTO) || defined(HAVE_WINCRYPT_H) || \ defined(HAVE_COMMONCRYPTO_COMMONCRYPTO_H) @@ -475,8 +495,12 @@ int main(int argc, char** argv) /* * mtxex.dll is * 23e72ce7e9cdbc80c0095484ebeb02f56b21e48fd67044e69e7a2ae76db631e5, which was - * taken from a Windows 10 install. The details of which are: export_timestamp - * = 1827812126 dll_name = "mtxex.dll" number_of_exports = 4 export_details + * taken from a Windows 10 install. The details of which are: + * + * export_timestamp = 1827812126 + * dll_name = "mtxex.dll" + * number_of_exports = 4 + * export_details * [0] * offset = 1072 * name = "DllGetClassObject" From ee6750785991133a10b31c6e2ca5ad8b1dae9806 Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Fri, 29 Sep 2023 15:34:33 -0400 Subject: [PATCH 2/4] Implement load config parsing. Specifically, I'm only parsing out the timestamp field of the structure. While here, rename the newly created "debug_infos" array to "debug_details". --- libyara/include/yara/pe.h | 69 ++++++++++++++++++++++++++++++ libyara/modules/pe/pe.c | 88 ++++++++++++++++++++++++++++++++------- 2 files changed, 142 insertions(+), 15 deletions(-) diff --git a/libyara/include/yara/pe.h b/libyara/include/yara/pe.h index 096e75afb0..97d26a659e 100644 --- a/libyara/include/yara/pe.h +++ b/libyara/include/yara/pe.h @@ -566,6 +566,75 @@ typedef struct _IMAGE_RESOURCE_DIRECTORY WORD NumberOfIdEntries; } IMAGE_RESOURCE_DIRECTORY, *PIMAGE_RESOURCE_DIRECTORY; +typedef struct _IMAGE_LOAD_CONFIG_DIRECTORY32 +{ + DWORD Characteristics; + DWORD TimeDateStamp; + WORD MajorVersion; + WORD MinorVersion; + DWORD GlobalFlagsClear; + DWORD GlobalFlagsSet; + DWORD CriticalSectionDefaultTimeout; + DWORD DeCommitFreeBlockThreshold; + DWORD DeCommitTotalFreeThreshold; + DWORD LockPrefixTable; + DWORD MaximumAllocationSize; + DWORD VirtualMemoryThreshold; + DWORD ProcessAffinityMask; + DWORD ProcessHeapFlags; + WORD CSDVersion; + WORD Reserved; + DWORD EditList; + DWORD SecurityCookie; + DWORD SEHandlerTable; + DWORD SEHandlerCount; + DWORD GuardCFCheckFunctionPointer; + DWORD GuardCFDispatchFunctionPointer; + DWORD GuardCFFunctionTable; + DWORD GuardCFFunctionCount; + DWORD GuardFlags; + BYTE CodeIntegrity[12]; + DWORD GuardAddressTakenIatEntryTable; + DWORD GuardAddressTakenIatEntryCount; + DWORD GuardLongJumpTargetTable; + DWORD GuardLongJumpTargetCount; +} IMAGE_LOAD_CONFIG_DIRECTORY32, *PIMAGE_LOAD_CONFIG_DIRECTORY32; + +typedef struct _IMAGE_LOAD_CONFIG_DIRECTORY64 +{ + DWORD Characteristics; + DWORD TimeDateStamp; + WORD MajorVersion; + WORD MinorVersion; + DWORD GlobalFlagsClear; + DWORD GlobalFlagsSet; + DWORD CriticalSectionDefaultTimeout; + ULONGLONG DeCommitFreeBlockThreshold; + ULONGLONG DeCommitTotalFreeThreshold; + ULONGLONG LockPrefixTable; + ULONGLONG MaximumAllocationSize; + ULONGLONG VirtualMemoryThreshold; + ULONGLONG ProcessAffinityMask; + DWORD ProcessHeapFlags; + WORD CSDVersion; + WORD Reserved; + ULONGLONG EditList; + ULONGLONG SecurityCookie; + ULONGLONG SEHandlerTable; + ULONGLONG SEHandlerCount; + ULONGLONG GuardCFCheckFunctionPointer; + ULONGLONG GuardCFDispatchFunctionPointer; + ULONGLONG GuardCFFunctionTable; + ULONGLONG GuardCFFunctionCount; + DWORD GuardFlags; + BYTE CodeIntegrity[12]; + ULONGLONG GuardAddressTakenIatEntryTable; + ULONGLONG GuardAddressTakenIatEntryCount; + ULONGLONG GuardLongJumpTargetTable; + ULONGLONG GuardLongJumpTargetCount; +} IMAGE_LOAD_CONFIG_DIRECTORY64, *PIMAGE_LOAD_CONFIG_DIRECTORY64; + + #define IMAGE_DEBUG_TYPE_FPO 3 #define IMAGE_DEBUG_TYPE_MISC 4 #define IMAGE_DEBUG_TYPE_EXCEPTION 5 diff --git a/libyara/modules/pe/pe.c b/libyara/modules/pe/pe.c index e9210a7208..06d3085ba1 100644 --- a/libyara/modules/pe/pe.c +++ b/libyara/modules/pe/pe.c @@ -292,6 +292,66 @@ static void pe_parse_rich_signature(PE* pe, uint64_t base_address) yr_free(version_data); } +static void pe_parse_load_config_directory(PE* pe) +{ + PIMAGE_DATA_DIRECTORY data_dir; + // Use the 32bit version of this structure as we are only interested in + // extracting the timestamp which is at the same offset in both the 32bit and + // 64bit structures. + PIMAGE_LOAD_CONFIG_DIRECTORY32 load_config; + int64_t offset; + + data_dir = pe_get_directory_entry(pe, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG); + + if (data_dir == NULL) + return; + + if (yr_le32toh(data_dir->Size) == 0) + return; + + if (yr_le32toh(data_dir->VirtualAddress) == 0) + return; + + // Dear Future WXS, + // + // Can't do a size check here because, and I quote + // https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#load-configuration-directory + // + // + // The data directory entry for a pre-reserved SEH load configuration + // structure must specify a particular size of the load configuration + // structure because the operating system loader always expects it to be a + // certain value. In that regard, the size is really only a version check. For + // compatibility with Windows XP and earlier versions of Windows, the size + // must be 64 for x86 images. + // + // + // However, some files declare a size that is 148 bytes but are storing a + // structure that is 192 bytes. An example of this is + // f36c7873c86331db0fadecd1e9f48839bd42cdd72acf051987aee5f1b097cbbd, which is + // a Microsoft generated 64bit PE that declares the "wrong" size in the load + // configuration data directory entry. + // + // Instead of doing it just check if the 32bit structure fits after we follow + // the RVA to offset calculation. + // + // Love, + // Past WXS + offset = pe_rva_to_offset(pe, yr_le32toh(data_dir->VirtualAddress)); + if (offset < 0) + return; + + load_config = (PIMAGE_LOAD_CONFIG_DIRECTORY32) (pe->data + offset); + + if (!struct_fits_in_pe(pe, load_config, IMAGE_LOAD_CONFIG_DIRECTORY32)) + return; + + yr_set_integer( + yr_le32toh(load_config->TimeDateStamp), + pe->object, + "load_config_timestamp"); +} + static void pe_parse_debug_directory(PE* pe) { PIMAGE_DATA_DIRECTORY data_dir; @@ -335,21 +395,16 @@ static void pe_parse_debug_directory(PE* pe) if (!struct_fits_in_pe(pe, debug_dir, IMAGE_DEBUG_DIRECTORY)) break; - // Intentionally pulling out timestamps even if it isn't CODEVIEW as it is - // still useful to know. + // Intentionally pulling out timestamps and types even if it isn't CODEVIEW + // as it is still useful to know. yr_set_integer( yr_le32toh(debug_dir->TimeDateStamp), pe->object, - "debug_infos[%i].timestamp", + "debug_details[%i].timestamp", i); - // Intentionally pulling out timestamps even if it isn't CODEVIEW as it is - // still useful to know. yr_set_integer( - yr_le32toh(debug_dir->Type), - pe->object, - "debug_infos[%i].type", - i); + yr_le32toh(debug_dir->Type), pe->object, "debug_details[%i].type", i); // Increment parsed_dirs here because we have filled in part of the // structure at this index in the array, even if we can only populate other @@ -417,14 +472,14 @@ static void pe_parse_debug_directory(PE* pe) { yr_set_sized_string(pdb_path, pdb_path_len, pe->object, "pdb_path"); } - // We always parse all PDB paths for debug_infos array. + // We always parse all PDB paths for debug_details array. yr_set_sized_string( - pdb_path, pdb_path_len, pe->object, "debug_infos[%i].pdb_path", i); + pdb_path, pdb_path_len, pe->object, "debug_details[%i].pdb_path", i); } } } - yr_set_integer(parsed_dirs, pe->object, "number_of_debug_infos"); + yr_set_integer(parsed_dirs, pe->object, "number_of_debug_details"); } // Return a pointer to the resource directory string or NULL. @@ -3831,12 +3886,14 @@ begin_declarations declare_function("is_32bit", "", "i", is_32bit); declare_function("is_64bit", "", "i", is_64bit); - declare_integer("number_of_debug_infos"); - begin_struct_array("debug_infos") + declare_integer("number_of_debug_details"); + begin_struct_array("debug_details") declare_integer("type"); declare_integer("timestamp"); declare_string("pdb_path"); - end_struct_array("debug_infos"); + end_struct_array("debug_details"); + + declare_integer("load_config_timestamp"); declare_integer("number_of_imports"); declare_integer("number_of_imported_functions"); @@ -4388,6 +4445,7 @@ int module_load( pe_parse_header(pe, block->base, context->flags); pe_parse_rich_signature(pe, block->base); pe_parse_debug_directory(pe); + pe_parse_load_config_directory(pe); #if defined(HAVE_LIBCRYPTO) && !defined(BORINGSSL) pe_parse_certificates(pe); From 288407970f432b69e8b3e626f478cefb1bd44ddf Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Fri, 29 Sep 2023 16:42:11 -0400 Subject: [PATCH 3/4] Fix tests after rename. --- tests/test-pe.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/test-pe.c b/tests/test-pe.c index 51763c63ea..ea72786483 100644 --- a/tests/test-pe.c +++ b/tests/test-pe.c @@ -261,16 +261,16 @@ int main(int argc, char** argv) "import \"pe\" \ rule test { \ condition: \ - pe.number_of_debug_infos == 3 and \ - pe.debug_infos[0].type == 2 and \ - pe.debug_infos[0].timestamp == 1827812126 and \ - pe.debug_infos[0].pdb_path == \"mtxex.pdb\" and \ - pe.debug_infos[1].type == 13 and \ - pe.debug_infos[1].timestamp == 1827812126 and \ - not defined pe.debug_infos[1].pdb_path and \ - pe.debug_infos[2].type == 16 and \ - pe.debug_infos[2].timestamp == 1827812126 and \ - not defined pe.debug_infos[2].pdb_path and \ + pe.number_of_debug_details == 3 and \ + pe.debug_details[0].type == 2 and \ + pe.debug_details[0].timestamp == 1827812126 and \ + pe.debug_details[0].pdb_path == \"mtxex.pdb\" and \ + pe.debug_details[1].type == 13 and \ + pe.debug_details[1].timestamp == 1827812126 and \ + not defined pe.debug_details[1].pdb_path and \ + pe.debug_details[2].type == 16 and \ + pe.debug_details[2].timestamp == 1827812126 and \ + not defined pe.debug_details[2].pdb_path and \ pe.pdb_path == \"mtxex.pdb\" \ }", "tests/data/mtxex.dll"); @@ -496,7 +496,7 @@ int main(int argc, char** argv) * mtxex.dll is * 23e72ce7e9cdbc80c0095484ebeb02f56b21e48fd67044e69e7a2ae76db631e5, which was * taken from a Windows 10 install. The details of which are: - * + * * export_timestamp = 1827812126 * dll_name = "mtxex.dll" * number_of_exports = 4 From 645fc36ebac0a2066b194ef44a00a442a27f7b76 Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Fri, 29 Sep 2023 16:46:05 -0400 Subject: [PATCH 4/4] Add docs for debug_details and load_config_timestamp. --- docs/modules/pe.rst | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/modules/pe.rst b/docs/modules/pe.rst index 7196d029d1..cb0469aa32 100644 --- a/docs/modules/pe.rst +++ b/docs/modules/pe.rst @@ -1089,6 +1089,36 @@ Reference *Example: pe.pdb_path == "D:\\workspace\\2018_R9_RelBld\\target\\checkout\\custprof\\Release\\custprof.pdb"* +.. c:type:: debug_details + + .. versionadded:: 4.4.0 + + Array of structures containing information about the PE's debug information. + + .. c:member:: type + + Type of debug information + + .. c:member:: timestamp + + Timestamp in the debug entry. + + .. c:member:: pdb_path + + Path of the PDB file for this entry. + +.. c:type:: number_of_debug_details + + .. versionadded:: 4.4.0 + + Number of entries in the debug_details array. + +.. c:type:: load_config_timestamp + + .. versionadded:: 4.4.0 + + Timestamp pulled from the load configuration, if present. + .. c:function:: exports(function_name) Function returning true if the PE exports *function_name* or