From e60ab548d726324daf5d3814d675b62d22f95e39 Mon Sep 17 00:00:00 2001 From: Hilko Bengen Date: Mon, 14 Mar 2022 09:22:48 +0100 Subject: [PATCH 01/28] Fix logic error in Linux memory scanning (#1662) Memory-mapped files from tmpfs (major number = 0) are no longer ignored. --- libyara/proc/linux.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index 2db9e7c5a3..6d8e55b7de 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -157,8 +157,9 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) int fd = -2; // Assume mapping not connected with a file. - if (strlen(proc_info->map_path) > 0 && proc_info->map_dmaj != 0 && - proc_info->map_ino != 0) + // Only try mapping the file if it has a path and belongs to a device + if (strlen(proc_info->map_path) > 0 && + !(proc_info->map_dmaj == 0 && proc_info->map_dmin == 0)) { struct stat st; fd = open(proc_info->map_path, O_RDONLY); From 8b5d27b4be8e67e294603733463b3b5201d15004 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 17 Mar 2022 11:39:45 +0100 Subject: [PATCH 02/28] Fix wrong caching logic in "magic" module. Closes #1663. --- Makefile.am | 6 +++++ libyara/modules.c | 4 +--- libyara/modules/magic/magic.c | 23 ++++++++---------- tests/test-magic.c | 44 +++++++++++++++++++++++++++++++++++ 4 files changed, 61 insertions(+), 16 deletions(-) create mode 100644 tests/test-magic.c diff --git a/Makefile.am b/Makefile.am index 4ebf01d15e..ca1d615165 100644 --- a/Makefile.am +++ b/Makefile.am @@ -138,6 +138,12 @@ test_dotnet_SOURCES = tests/test-dotnet.c tests/util.c test_dotnet_LDADD = libyara/.libs/libyara.a endif +if MAGIC_MODULE +check_PROGRAMS+=test-magic +test_magic_SOURCES = tests/test-magic.c tests/util.c +test_magic_LDADD = libyara/.libs/libyara.a +endif + if PB_TESTS_MODULE check_PROGRAMS+=test-pb test_pb_SOURCES = tests/test-pb.c tests/util.c diff --git a/libyara/modules.c b/libyara/modules.c index 092fbf4f56..af43905082 100644 --- a/libyara/modules.c +++ b/libyara/modules.c @@ -177,9 +177,7 @@ int yr_modules_load(const char* module_name, YR_SCAN_CONTEXT* context) int yr_modules_unload_all(YR_SCAN_CONTEXT* context) { - int i; - - for (i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) + for (int i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { YR_OBJECT* module_structure = (YR_OBJECT*) yr_hash_table_remove( context->objects_table, yr_modules_table[i].name, NULL); diff --git a/libyara/modules/magic/magic.c b/libyara/modules/magic/magic.c index 7a72b7d5ef..bbaad804b7 100644 --- a/libyara/modules/magic/magic.c +++ b/libyara/modules/magic/magic.c @@ -39,11 +39,9 @@ The original idea and inspiration for this module comes from Armin Buescher. #define MODULE_NAME magic - // Thread-local storage key used to store a pointer to a MAGIC_CACHE struct. YR_THREAD_STORAGE_KEY magic_tls; - typedef struct { magic_t magic_cookie; @@ -52,7 +50,6 @@ typedef struct } MAGIC_CACHE; - static int get_cache(MAGIC_CACHE** cache) { *cache = (MAGIC_CACHE*) yr_thread_storage_get_value(&magic_tls); @@ -88,7 +85,6 @@ static int get_cache(MAGIC_CACHE** cache) return ERROR_SUCCESS; } - define_function(magic_mime_type) { YR_SCAN_CONTEXT* context = scan_context(); @@ -111,8 +107,8 @@ define_function(magic_mime_type) { magic_setflags(cache->magic_cookie, MAGIC_MIME_TYPE); - cache->cached_mime_type = magic_buffer( - cache->magic_cookie, block_data, block->size); + cache->cached_mime_type = yr_strdup( + magic_buffer(cache->magic_cookie, block_data, block->size)); } } @@ -122,7 +118,6 @@ define_function(magic_mime_type) return_string((char*) cache->cached_mime_type); } - define_function(magic_type) { MAGIC_CACHE* cache; @@ -145,8 +140,8 @@ define_function(magic_type) { magic_setflags(cache->magic_cookie, 0); - cache->cached_type = magic_buffer( - cache->magic_cookie, block_data, block->size); + cache->cached_type = yr_strdup( + magic_buffer(cache->magic_cookie, block_data, block->size)); } } @@ -161,13 +156,11 @@ begin_declarations declare_function("type", "", "s", magic_type); end_declarations - int module_initialize(YR_MODULE* module) { return yr_thread_storage_create(&magic_tls); } - int module_finalize(YR_MODULE* module) { MAGIC_CACHE* cache = (MAGIC_CACHE*) yr_thread_storage_get_value(&magic_tls); @@ -181,7 +174,6 @@ int module_finalize(YR_MODULE* module) return yr_thread_storage_destroy(&magic_tls); } - int module_load( YR_SCAN_CONTEXT* context, YR_OBJECT* module_object, @@ -191,13 +183,18 @@ int module_load( return ERROR_SUCCESS; } - int module_unload(YR_OBJECT* module) { MAGIC_CACHE* cache = (MAGIC_CACHE*) yr_thread_storage_get_value(&magic_tls); if (cache != NULL) { + if (cache->cached_type != NULL) + yr_free((void*) cache->cached_type); + + if (cache->cached_mime_type != NULL) + yr_free((void*) cache->cached_mime_type); + cache->cached_type = NULL; cache->cached_mime_type = NULL; } diff --git a/tests/test-magic.c b/tests/test-magic.c new file mode 100644 index 0000000000..adb0971e70 --- /dev/null +++ b/tests/test-magic.c @@ -0,0 +1,44 @@ +#include +#include +#include + +#include "blob.h" +#include "util.h" + +int main(int argc, char** argv) +{ + int result = 0; + + YR_DEBUG_INITIALIZE(); + YR_DEBUG_FPRINTF(1, stderr, "+ %s() { // in %s\n", __FUNCTION__, argv[0]); + + init_top_srcdir(); + + yr_initialize(); + + assert_true_rule_blob( + "import \"magic\" rule test { condition: \ + magic.type() contains \"ELF\" }", + ELF32_FILE); + + assert_true_rule_blob( + "import \"magic\" rule test { condition: \ + magic.type() contains \"PE32\" and \ + magic.mime_type() == \"application/x-dosexec\" }", + PE32_FILE); + + // Test case for https://github.com/VirusTotal/yara/issues/1663 + assert_true_rule_blob( + "import \"magic\" rule test { condition: \ + magic.type() contains \"Mach-O\" and \ + magic.mime_type() == \"application/x-mach-binary\" and \ + magic.type() contains \"Mach-O\"}", + MACHO_X86_FILE); + + yr_finalize(); + + YR_DEBUG_FPRINTF( + 1, stderr, "} = %d // %s() in %s\n", result, __FUNCTION__, argv[0]); + + return result; +} From 18ae3fdfcea373ed7fa6e8fe0440d08c6ae6781e Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 1 Apr 2022 10:07:28 +0200 Subject: [PATCH 03/28] Fix use of uninitialized variable. Closes #1672. --- libyara/proc/linux.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index 6d8e55b7de..a9e93c2224 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -324,8 +324,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( if (proc_info->next_block_end <= current_begin) { - int n, path_start; - char *p; + int path_start, n = 0; + char* p; while (fgets(buffer, sizeof(buffer), proc_info->maps) != NULL) { From 06747122f341c6dd9b4b769fd790343f8f870e4d Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 4 Apr 2022 10:05:08 +0200 Subject: [PATCH 04/28] Fix #1674. --- libyara/hex_lexer.c | 7 ++++++- libyara/hex_lexer.l | 7 ++++++- libyara/lexer.c | 26 +++++++++++++++++++++----- libyara/lexer.l | 26 +++++++++++++++++++++----- libyara/re_lexer.c | 7 ++++++- libyara/re_lexer.l | 7 ++++++- 6 files changed, 66 insertions(+), 14 deletions(-) diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c index dfe5a921d4..cd525615a5 100644 --- a/libyara/hex_lexer.c +++ b/libyara/hex_lexer.c @@ -2484,7 +2484,12 @@ int yr_parse_hex_string( (*re_ast)->flags |= RE_FLAGS_DOT_ALL; - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(hex_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l index b913a9f925..adac542902 100644 --- a/libyara/hex_lexer.l +++ b/libyara/hex_lexer.l @@ -262,7 +262,12 @@ int yr_parse_hex_string( (*re_ast)->flags |= RE_FLAGS_DOT_ALL; - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(hex_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/lexer.c b/libyara/lexer.c index 1a8ed11fbf..3e1f977767 100644 --- a/libyara/lexer.c +++ b/libyara/lexer.c @@ -3588,10 +3588,15 @@ int yr_lex_parse_rules_string( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -3615,10 +3620,15 @@ int yr_lex_parse_rules_file( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -3684,7 +3694,13 @@ int yr_lex_parse_rules_fd( return compiler->errors; } - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_free(buffer); + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; + return compiler->errors; + } #if YYDEBUG yydebug = 1; diff --git a/libyara/lexer.l b/libyara/lexer.l index 5556f96952..637efc970e 100644 --- a/libyara/lexer.l +++ b/libyara/lexer.l @@ -896,10 +896,15 @@ int yr_lex_parse_rules_string( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -923,10 +928,15 @@ int yr_lex_parse_rules_file( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -992,7 +1002,13 @@ int yr_lex_parse_rules_fd( return compiler->errors; } - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_free(buffer); + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; + return compiler->errors; + } #if YYDEBUG yydebug = 1; diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c index bf5520f351..7a5b2e14e6 100644 --- a/libyara/re_lexer.c +++ b/libyara/re_lexer.c @@ -2871,7 +2871,12 @@ int yr_parse_re_string( FAIL_ON_ERROR(yr_re_ast_create(re_ast)); - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(re_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l index 9a19410c5b..6a9dd1e632 100644 --- a/libyara/re_lexer.l +++ b/libyara/re_lexer.l @@ -629,7 +629,12 @@ int yr_parse_re_string( FAIL_ON_ERROR(yr_re_ast_create(re_ast)); - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(re_string, yyscanner); yyparse(yyscanner, &lex_env); From db2de269092c3f7681d43bf54fcad577e24dd63d Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 4 Apr 2022 11:41:36 +0200 Subject: [PATCH 05/28] Fix #1673. --- libyara/compiler.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libyara/compiler.c b/libyara/compiler.c index 34f141294d..4bfbf9bce0 100644 --- a/libyara/compiler.c +++ b/libyara/compiler.c @@ -286,22 +286,28 @@ YR_API int yr_compiler_create(YR_COMPILER** compiler) YR_API void yr_compiler_destroy(YR_COMPILER* compiler) { - yr_arena_release(compiler->arena); + if (compiler->arena != NULL) + yr_arena_release(compiler->arena); if (compiler->automaton != NULL) yr_ac_automaton_destroy(compiler->automaton); - yr_hash_table_destroy(compiler->rules_table, NULL); + if (compiler->rules_table != NULL) + yr_hash_table_destroy(compiler->rules_table, NULL); - yr_hash_table_destroy(compiler->strings_table, NULL); + if (compiler->strings_table != NULL) + yr_hash_table_destroy(compiler->strings_table, NULL); - yr_hash_table_destroy(compiler->wildcard_identifiers_table, NULL); + if (compiler->wildcard_identifiers_table != NULL) + yr_hash_table_destroy(compiler->wildcard_identifiers_table, NULL); - yr_hash_table_destroy(compiler->sz_table, NULL); + if (compiler->sz_table != NULL) + yr_hash_table_destroy(compiler->sz_table, NULL); - yr_hash_table_destroy( - compiler->objects_table, - (YR_HASH_TABLE_FREE_VALUE_FUNC) yr_object_destroy); + if (compiler->objects_table != NULL) + yr_hash_table_destroy( + compiler->objects_table, + (YR_HASH_TABLE_FREE_VALUE_FUNC) yr_object_destroy); if (compiler->atoms_config.free_quality_table) yr_free(compiler->atoms_config.quality_table); From a66532c7878fbe655ac9cdcc2b783e34ce769259 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 4 Apr 2022 13:15:30 +0200 Subject: [PATCH 06/28] Fix #1675. --- libyara/object.c | 51 ++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/libyara/object.c b/libyara/object.c index 970cf5a9a0..b0a656a7c1 100644 --- a/libyara/object.c +++ b/libyara/object.c @@ -41,6 +41,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +//////////////////////////////////////////////////////////////////////////////// +// Creates a new object with the given type and identifier. If a parent is +// specified the new object is owned by the parent and it will be destroyed when +// the parent is destroyed. You must not call yr_object_destroy on an objected +// that has a parent, you should destroy the parent instead. +// int yr_object_create( int8_t type, const char* identifier, @@ -48,7 +54,6 @@ int yr_object_create( YR_OBJECT** object) { YR_OBJECT* obj; - int i; size_t object_size = 0; assert(parent != NULL || object != NULL); @@ -114,7 +119,7 @@ int yr_object_create( break; case OBJECT_TYPE_FUNCTION: object_as_function(obj)->return_obj = NULL; - for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) + for (int i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) { object_as_function(obj)->prototypes[i].arguments_fmt = NULL; object_as_function(obj)->prototypes[i].code = NULL; @@ -186,7 +191,6 @@ int yr_object_function_create( YR_OBJECT_FUNCTION* f = NULL; int8_t return_type; - int i; // The parent of a function must be a structure. assert(parent != NULL && parent->type == OBJECT_TYPE_STRUCTURE); @@ -219,14 +223,15 @@ int yr_object_function_create( FAIL_ON_ERROR( yr_object_create(OBJECT_TYPE_FUNCTION, identifier, parent, &o)); - FAIL_ON_ERROR_WITH_CLEANUP( - yr_object_create(return_type, "result", o, &return_obj), - yr_object_destroy(o)); + // In case of failure while creating return_obj we don't need to free the + // previously created "o" object, as it is already associated with its + // parent and will be destroyed when the parent is destroyed. + FAIL_ON_ERROR(yr_object_create(return_type, "result", o, &return_obj)); f = object_as_function(o); } - for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) + for (int i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) { if (f->prototypes[i].arguments_fmt == NULL) { @@ -306,6 +311,10 @@ int yr_object_from_external_variable( return result; } +//////////////////////////////////////////////////////////////////////////////// +// Destroy an objects, and any other object that is a child of it. For example, +// destroying a struct will destroy all its members. +// void yr_object_destroy(YR_OBJECT* object) { YR_STRUCTURE_MEMBER* member; @@ -313,8 +322,6 @@ void yr_object_destroy(YR_OBJECT* object) YR_ARRAY_ITEMS* array_items; YR_DICTIONARY_ITEMS* dict_items; - int i; - if (object == NULL) return; @@ -345,7 +352,7 @@ void yr_object_destroy(YR_OBJECT* object) if (array_items != NULL) { - for (i = 0; i < array_items->length; i++) + for (int i = 0; i < array_items->length; i++) if (array_items->objects[i] != NULL) yr_object_destroy(array_items->objects[i]); } @@ -361,7 +368,7 @@ void yr_object_destroy(YR_OBJECT* object) if (dict_items != NULL) { - for (i = 0; i < dict_items->used; i++) + for (int i = 0; i < dict_items->used; i++) { if (dict_items->objects[i].key != NULL) yr_free(dict_items->objects[i].key); @@ -531,8 +538,6 @@ int yr_object_copy(YR_OBJECT* object, YR_OBJECT** object_copy) YR_STRUCTURE_MEMBER* structure_member; - int i; - *object_copy = NULL; FAIL_ON_ERROR( @@ -568,7 +573,7 @@ int yr_object_copy(YR_OBJECT* object, YR_OBJECT** object_copy) // cleanup yr_object_destroy(copy)); - for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) + for (int i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) object_as_function(copy)->prototypes[i] = object_as_function(object)->prototypes[i]; @@ -691,7 +696,6 @@ int yr_object_array_set_item(YR_OBJECT* object, YR_OBJECT* item, int index) { YR_OBJECT_ARRAY* array; - int i; int capacity; assert(index >= 0); @@ -728,7 +732,7 @@ int yr_object_array_set_item(YR_OBJECT* object, YR_OBJECT* item, int index) if (array->items == NULL) return ERROR_INSUFFICIENT_MEMORY; - for (i = array->items->capacity; i < capacity; i++) + for (int i = array->items->capacity; i < capacity; i++) array->items->objects[i] = NULL; array->items->capacity = capacity; @@ -748,8 +752,6 @@ YR_OBJECT* yr_object_dict_get_item( int flags, const char* key) { - int i; - YR_OBJECT* result = NULL; YR_OBJECT_DICTIONARY* dict; @@ -759,7 +761,7 @@ YR_OBJECT* yr_object_dict_get_item( if (dict->items != NULL) { - for (i = 0; i < dict->items->used; i++) + for (int i = 0; i < dict->items->used; i++) { if (strcmp(dict->items->objects[i].key->c_string, key) == 0) result = dict->items->objects[i].obj; @@ -781,7 +783,6 @@ int yr_object_dict_set_item(YR_OBJECT* object, YR_OBJECT* item, const char* key) { YR_OBJECT_DICTIONARY* dict; - int i; int count; assert(object->type == OBJECT_TYPE_DICTIONARY); @@ -813,7 +814,7 @@ int yr_object_dict_set_item(YR_OBJECT* object, YR_OBJECT* item, const char* key) if (dict->items == NULL) return ERROR_INSUFFICIENT_MEMORY; - for (i = dict->items->used; i < count; i++) + for (int i = dict->items->used; i < count; i++) { dict->items->objects[i].key = NULL; dict->items->objects[i].obj = NULL; @@ -1072,7 +1073,6 @@ YR_API void yr_object_print_data( YR_STRUCTURE_MEMBER* member; char indent_spaces[32]; - int i; indent = yr_min(indent, sizeof(indent_spaces) - 1); @@ -1105,10 +1105,9 @@ YR_API void yr_object_print_data( if (object->value.ss != NULL) { - size_t l; printf(" = \""); - for (l = 0; l < object->value.ss->length; l++) + for (size_t l = 0; l < object->value.ss->length; l++) { char c = object->value.ss->c_string[l]; @@ -1144,7 +1143,7 @@ YR_API void yr_object_print_data( break; case OBJECT_TYPE_ARRAY: - for (i = 0; i < yr_object_array_length(object); i++) + for (int i = 0; i < yr_object_array_length(object); i++) { YR_OBJECT* o = yr_object_array_get_item(object, 0, i); @@ -1162,7 +1161,7 @@ YR_API void yr_object_print_data( if (dict_items != NULL) { - for (i = 0; i < dict_items->used; i++) + for (int i = 0; i < dict_items->used; i++) { printf("\n%s\t%s", indent_spaces, dict_items->objects[i].key->c_string); From 90c44ddcf633cd111cae896d40bcffe30a8ac0bb Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 5 Apr 2022 13:07:41 +0200 Subject: [PATCH 07/28] Fix #1671 iterator->last_error is now initialized to ERROR_SUCCESS in yr_process_open_iterator. Additionally if yr_process_get_first_memory_block returns, NULL, iterator->last_error is set to ERROR_COULD_NOT_READ_PROCESS_MEMORY. --- libyara/proc.c | 2 ++ libyara/proc/freebsd.c | 11 ++++++++--- libyara/proc/linux.c | 9 ++++++--- libyara/proc/mach.c | 7 ++++++- libyara/proc/openbsd.c | 11 ++++++++--- libyara/proc/windows.c | 7 ++++++- 6 files changed, 36 insertions(+), 11 deletions(-) diff --git a/libyara/proc.c b/libyara/proc.c index 2c3ccce4e3..089c25698f 100644 --- a/libyara/proc.c +++ b/libyara/proc.c @@ -54,6 +54,8 @@ YR_API int yr_process_open_iterator(int pid, YR_MEMORY_BLOCK_ITERATOR* iterator) iterator->context = context; iterator->first = yr_process_get_first_memory_block; iterator->next = yr_process_get_next_memory_block; + iterator->last_error = ERROR_SUCCESS; + // In a process scan file size is undefined, when the file_size function is // set to NULL the value returned by the filesize keyword is YR_UNDEFINED. iterator->file_size = NULL; diff --git a/libyara/proc/freebsd.c b/libyara/proc/freebsd.c index a8e2c4b94d..54d3a445f8 100644 --- a/libyara/proc/freebsd.c +++ b/libyara/proc/freebsd.c @@ -138,8 +138,6 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( proc_info->vm_entry.pve_path = buf; proc_info->vm_entry.pve_pathlen = sizeof(buf); - iterator->last_error = ERROR_SUCCESS; - uint64_t current_begin = context->current_block.base + context->current_block.size; @@ -148,6 +146,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( yr_get_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, &max_process_memory_chunk); + iterator->last_error = ERROR_SUCCESS; + if (proc_info->vm_entry.pve_end <= current_begin) { if (ptrace( @@ -180,7 +180,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( proc_info->vm_entry.pve_entry = 0; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index a9e93c2224..74fbf837c5 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -312,8 +312,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( char buffer[PATH_MAX]; char perm[5]; - uint64_t begin, end; + uint64_t begin, end; uint64_t current_begin = context->current_block.base + context->current_block.size; @@ -322,6 +322,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( yr_get_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, &max_process_memory_chunk); + iterator->last_error = ERROR_SUCCESS; + if (proc_info->next_block_end <= current_begin) { int path_start, n = 0; @@ -399,8 +401,6 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( assert(context->current_block.size > 0); - iterator->last_error = ERROR_SUCCESS; - YR_DEBUG_FPRINTF( 2, stderr, @@ -434,6 +434,9 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( _exit: + if (result == NULL) + iterator->last_result = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + YR_DEBUG_FPRINTF(2, stderr, "} = %p // %s()\n", result, __FUNCTION__); return result; diff --git a/libyara/proc/mach.c b/libyara/proc/mach.c index 52e0df2e34..8ed3f56658 100644 --- a/libyara/proc/mach.c +++ b/libyara/proc/mach.c @@ -175,7 +175,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( context->current_block.base = 0; context->current_block.size = 0; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/proc/openbsd.c b/libyara/proc/openbsd.c index 1cba6fae60..8e2d0c311d 100644 --- a/libyara/proc/openbsd.c +++ b/libyara/proc/openbsd.c @@ -150,8 +150,6 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( int mib[] = {CTL_KERN, KERN_PROC_VMMAP, proc_info->pid}; size_t len = sizeof(struct kinfo_vmentry); - iterator->last_error = ERROR_SUCCESS; - uint64_t current_begin = context->current_block.base + context->current_block.size; @@ -160,6 +158,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( yr_get_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, &max_process_memory_chunk); + iterator->last_error = ERROR_SUCCESS; + if (proc_info->old_end <= current_begin) { if (sysctl(mib, 3, &proc_info->vm_entry, &len, NULL, 0) < 0) @@ -192,7 +192,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( proc_info->vm_entry.kve_start = 0; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/proc/windows.c b/libyara/proc/windows.c index 3668cff8a5..14780e175b 100644 --- a/libyara/proc/windows.c +++ b/libyara/proc/windows.c @@ -185,7 +185,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( context->current_block.base = (size_t) proc_info->si.lpMinimumApplicationAddress; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif From f1007df11ca176fedfdc1888627a81370349948d Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 5 Apr 2022 13:49:09 +0200 Subject: [PATCH 08/28] Fix wrong field name. --- libyara/proc/linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index 74fbf837c5..7f1536712b 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -435,7 +435,7 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( _exit: if (result == NULL) - iterator->last_result = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; YR_DEBUG_FPRINTF(2, stderr, "} = %p // %s()\n", result, __FUNCTION__); From cf3e556478ef5ef0927d31db3746a6d02adf64a3 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 6 Apr 2022 11:11:45 +0200 Subject: [PATCH 09/28] Implement the --skip-larger command-line option in Windows. (#1678) Also allow a 64 bits integer as the argument to --skip-larger both in Linux and Windows. --- cli/args.c | 19 ++++++++++-- cli/args.h | 82 ++++++++++++++++++++++++--------------------------- cli/unicode.h | 39 ++++++++++++------------ cli/yara.c | 44 +++++++++++++++++++-------- cli/yarac.c | 2 +- 5 files changed, 108 insertions(+), 78 deletions(-) diff --git a/cli/args.c b/cli/args.c index f6e870af5f..939f3e8cd3 100644 --- a/cli/args.c +++ b/cli/args.c @@ -96,7 +96,7 @@ args_error_type_t args_parse_option( *(bool*) opt->value = !(*(bool*) opt->value); break; - case ARGS_OPT_INTEGER: + case ARGS_OPT_LONG: if (opt_arg == NULL) return ARGS_ERROR_REQUIRED_INTEGER_ARG; @@ -110,6 +110,20 @@ args_error_type_t args_parse_option( break; + case ARGS_OPT_LONG_LONG: + if (opt_arg == NULL) + return ARGS_ERROR_REQUIRED_INTEGER_ARG; + + *(long long*) opt->value = _tcstoll(opt_arg, &endptr, 0); + + if (*endptr != '\0') + return ARGS_ERROR_REQUIRED_INTEGER_ARG; + + if (opt_arg_was_used != NULL) + *opt_arg_was_used = 1; + + break; + case ARGS_OPT_STRING: if (opt_arg == NULL) return ARGS_ERROR_REQUIRED_STRING_ARG; @@ -268,7 +282,8 @@ void args_print_usage(args_option_t* options, int help_alignment) if (options->long_name != NULL) len += _stprintf(buffer + len, _T("--%s"), options->long_name); - if (options->type == ARGS_OPT_STRING || options->type == ARGS_OPT_INTEGER) + if (options->type == ARGS_OPT_STRING || options->type == ARGS_OPT_LONG || + options->type == ARGS_OPT_LONG_LONG) { len += _stprintf( buffer + len, diff --git a/cli/args.h b/cli/args.h index eca4bba6e5..e631219240 100644 --- a/cli/args.h +++ b/cli/args.h @@ -33,57 +33,59 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "unicode.h" - #ifdef __cplusplus extern "C" { #endif -typedef enum _args_error_type -{ + typedef enum _args_error_type + { ARGS_ERROR_OK, ARGS_ERROR_UNKNOWN_OPT, ARGS_ERROR_TOO_MANY, ARGS_ERROR_REQUIRED_INTEGER_ARG, ARGS_ERROR_REQUIRED_STRING_ARG, ARGS_ERROR_UNEXPECTED_ARG, -} args_error_type_t; - - -typedef enum _args_option_type -{ - // special - ARGS_OPT_END, - ARGS_OPT_GROUP, - // options with no arguments - ARGS_OPT_BOOLEAN, - // options with arguments (optional or required) - ARGS_OPT_INTEGER, - ARGS_OPT_STRING, -} args_option_type_t; - - -typedef struct _args_option -{ - args_option_type_t type; - const char_t short_name; - const char_t *long_name; - void *value; - int max_count; - const char_t *help; - const char_t *type_help; - int count; -} args_option_t; - + } args_error_type_t; + + typedef enum _args_option_type + { + // special + ARGS_OPT_END, + ARGS_OPT_GROUP, + // options with no arguments + ARGS_OPT_BOOLEAN, + // options with arguments (optional or required) + ARGS_OPT_LONG, + ARGS_OPT_LONG_LONG, + ARGS_OPT_STRING, + } args_option_type_t; + + typedef struct _args_option + { + args_option_type_t type; + const char_t short_name; + const char_t *long_name; + void *value; + int max_count; + const char_t *help; + const char_t *type_help; + int count; + } args_option_t; #define OPT_BOOLEAN(short_name, long_name, value, ...) \ { \ ARGS_OPT_BOOLEAN, short_name, long_name, value, 1, __VA_ARGS__ \ } -#define OPT_INTEGER(short_name, long_name, value, ...) \ - { \ - ARGS_OPT_INTEGER, short_name, long_name, value, 1, __VA_ARGS__ \ +#define OPT_LONG(short_name, long_name, value, ...) \ + { \ + ARGS_OPT_LONG, short_name, long_name, value, 1, __VA_ARGS__ \ + } + +#define OPT_LONG_LONG(short_name, long_name, value, ...) \ + { \ + ARGS_OPT_LONG_LONG, short_name, long_name, value, 1, __VA_ARGS__ \ } #define OPT_STRING_MULTI(short_name, long_name, value, max_count, ...) \ @@ -99,17 +101,11 @@ typedef struct _args_option ARGS_OPT_END, 0 \ } -int args_parse( - args_option_t *options, - int argc, - const char_t **argv); + int args_parse(args_option_t *options, int argc, const char_t **argv); -void args_print_usage( - args_option_t *options, - int alignment); + void args_print_usage(args_option_t *options, int alignment); -void args_free( - args_option_t *options); + void args_free(args_option_t *options); #ifdef __cplusplus } diff --git a/cli/unicode.h b/cli/unicode.h index 5edd63ff9f..631c1f8d3e 100644 --- a/cli/unicode.h +++ b/cli/unicode.h @@ -33,14 +33,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef _MSC_VER #include #define char_t TCHAR -#define PF_S "hs" -#define PF_C "hc" +#define PF_S "hs" +#define PF_C "hc" #else #define char_t char -#define _T(x) x -#define PF_S "s" -#define PF_C "c" +#define _T(x) x +#define PF_S "s" +#define PF_C "c" #ifdef __CYGWIN__ #define _tcstok_s strtok_r @@ -48,20 +48,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define _tcstok_s strtok_s #endif -#define _tcscmp strcmp -#define _tcsdup strdup -#define _tcschr strchr -#define _tcslen strlen -#define _tcsstr strstr -#define _tcstol strtol -#define _tstoi atoi -#define _tstof atof -#define _tisdigit isdigit -#define _tfopen fopen -#define _ftprintf fprintf -#define _stprintf sprintf -#define _tprintf printf -#define _tmain main +#define _tcscmp strcmp +#define _tcsdup strdup +#define _tcschr strchr +#define _tcslen strlen +#define _tcsstr strstr +#define _tcstol strtol +#define _tcstoll strtoll +#define _tstoi atoi +#define _tstof atof +#define _tisdigit isdigit +#define _tfopen fopen +#define _ftprintf fprintf +#define _stprintf sprintf +#define _tprintf printf +#define _tmain main #define _sntprintf snprintf #endif diff --git a/cli/yara.c b/cli/yara.c index 598fc91ab9..f8857a0c20 100644 --- a/cli/yara.c +++ b/cli/yara.c @@ -161,10 +161,10 @@ static long total_count = 0; static long limit = 0; static long timeout = 1000000; static long stack_size = DEFAULT_STACK_SIZE; -static long skip_larger = 0; static long threads = YR_MAX_THREADS; static long max_strings_per_rule = DEFAULT_MAX_STRINGS_PER_RULE; static long max_process_memory_chunk = DEFAULT_MAX_PROCESS_MEMORY_CHUNK; +static long long skip_larger = 0; #define USAGE_STRING \ "Usage: yara [OPTION]... [NAMESPACE:]RULES_FILE... FILE | DIR | PID" @@ -215,7 +215,7 @@ args_option_t options[] = { _T("print only rules named IDENTIFIER"), _T("IDENTIFIER")), - OPT_INTEGER( + OPT_LONG( 0, _T("max-process-memory-chunk"), &max_process_memory_chunk, @@ -223,14 +223,14 @@ args_option_t options[] = { _T(" (default=1073741824)"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 'l', _T("max-rules"), &limit, _T("abort scanning after matching a NUMBER of rules"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 0, _T("max-strings-per-rule"), &max_strings_per_rule, @@ -310,14 +310,14 @@ args_option_t options[] = { &scan_list_search, _T("scan files listed in FILE, one per line")), - OPT_INTEGER( + OPT_LONG_LONG( 'z', _T("skip-larger"), &skip_larger, _T("skip files larger than the given size when scanning a directory"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 'k', _T("stack-size"), &stack_size, @@ -332,14 +332,14 @@ args_option_t options[] = { _T("print only rules tagged as TAG"), _T("TAG")), - OPT_INTEGER( + OPT_LONG( 'p', _T("threads"), &threads, _T("use the specified NUMBER of threads to scan a directory"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 'a', _T("timeout"), &timeout, @@ -478,7 +478,25 @@ static int scan_dir(const char_t* dir, SCAN_OPTIONS* scan_opts) if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - result = file_queue_put(path, scan_opts->deadline); + LARGE_INTEGER file_size; + + file_size.HighPart = FindFileData.nFileSizeHigh; + file_size.LowPart = FindFileData.nFileSizeLow; + + if (skip_larger > file_size.QuadPart || skip_larger <= 0) + { + result = file_queue_put(path, scan_opts->deadline); + } + else + { + _ftprintf( + stderr, + _T("skipping %s (%" PRIu64 + " bytes) because it's larger than %lld bytes.\n"), + path, + file_size.QuadPart, + skip_larger); + } } else if ( scan_opts->recursive_search && @@ -670,7 +688,7 @@ static int scan_dir(const char* dir, SCAN_OPTIONS* scan_opts) { fprintf( stderr, - "skipping %s (%" PRId64 " bytes) because it's larger than %ld" + "skipping %s (%" PRId64 " bytes) because it's larger than %lld" " bytes.\n", full_path, st.st_size, @@ -1199,7 +1217,7 @@ static int callback( return CALLBACK_CONTINUE; case CALLBACK_MSG_CONSOLE_LOG: - _tprintf(_T("%"PF_S"\n"), (char*) message_data); + _tprintf(_T("%" PF_S "\n"), (char*) message_data); return CALLBACK_CONTINUE; } @@ -1393,10 +1411,10 @@ int _tmain(int argc, const char_t** argv) exit_with_code(EXIT_FAILURE); } - yr_set_configuration_uint32(YR_CONFIG_STACK_SIZE, stack_size); + yr_set_configuration_uint32(YR_CONFIG_STACK_SIZE, (uint32_t) stack_size); yr_set_configuration_uint32( - YR_CONFIG_MAX_STRINGS_PER_RULE, max_strings_per_rule); + YR_CONFIG_MAX_STRINGS_PER_RULE, (uint32_t) max_strings_per_rule); yr_set_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, max_process_memory_chunk); diff --git a/cli/yarac.c b/cli/yarac.c index ea3778eb7a..8144077400 100644 --- a/cli/yarac.c +++ b/cli/yarac.c @@ -103,7 +103,7 @@ args_option_t options[] = { OPT_BOOLEAN('h', _T("help"), &show_help, _T("show this help and exit")), - OPT_INTEGER( + OPT_LONG( 0, _T("max-strings-per-rule"), &max_strings_per_rule, From bbd854e6701ae588be0fc28e8fe77ad4fa245d60 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Wed, 6 Apr 2022 13:55:52 +0200 Subject: [PATCH 10/28] In case of error in yr_parse_(re|hex)_string set the returned pointer to NULL. As @1ndahous3 highlighted in #1674, not setting the pointer to NULL leads to a dangling pointer. --- libyara/hex_lexer.c | 1 + libyara/hex_lexer.l | 1 + libyara/re_lexer.c | 1 + libyara/re_lexer.l | 1 + 4 files changed, 4 insertions(+) diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c index cd525615a5..f3b26343bd 100644 --- a/libyara/hex_lexer.c +++ b/libyara/hex_lexer.c @@ -2487,6 +2487,7 @@ int yr_parse_hex_string( if (yylex_init(&yyscanner) != 0) { yr_re_ast_destroy(*re_ast); + *re_ast = NULL; return ERROR_INSUFFICIENT_MEMORY; } diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l index adac542902..05a1e32f08 100644 --- a/libyara/hex_lexer.l +++ b/libyara/hex_lexer.l @@ -265,6 +265,7 @@ int yr_parse_hex_string( if (yylex_init(&yyscanner) != 0) { yr_re_ast_destroy(*re_ast); + *re_ast = NULL; return ERROR_INSUFFICIENT_MEMORY; } diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c index 7a5b2e14e6..6e2a1f2bea 100644 --- a/libyara/re_lexer.c +++ b/libyara/re_lexer.c @@ -2874,6 +2874,7 @@ int yr_parse_re_string( if (yylex_init(&yyscanner) != 0) { yr_re_ast_destroy(*re_ast); + *re_ast = NULL; return ERROR_INSUFFICIENT_MEMORY; } diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l index 6a9dd1e632..d333a6d9b9 100644 --- a/libyara/re_lexer.l +++ b/libyara/re_lexer.l @@ -632,6 +632,7 @@ int yr_parse_re_string( if (yylex_init(&yyscanner) != 0) { yr_re_ast_destroy(*re_ast); + *re_ast = NULL; return ERROR_INSUFFICIENT_MEMORY; } From 1891cf1a18658636d4b85997d32e156dd0ecfe12 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 11 Apr 2022 13:08:37 +0200 Subject: [PATCH 11/28] Upgrade openssl to a more recent version when building YARA with Bazel. The previous version enters an infinite loop when calling d2i_PKCS7 with PE file 11203b5a4f18f40a86d74738c31a59c4ec1e838c177a415c46a372da49d4a51a. --- bazel/yara_deps.bzl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bazel/yara_deps.bzl b/bazel/yara_deps.bzl index 44b18ed21f..0d6e90d1fb 100644 --- a/bazel/yara_deps.bzl +++ b/bazel/yara_deps.bzl @@ -36,9 +36,9 @@ def yara_deps(): maybe( http_archive, name = "openssl", - url = "https://github.com/openssl/openssl/archive/OpenSSL_1_1_0h.tar.gz", - sha256 = "f56dd7d81ce8d3e395f83285bd700a1098ed5a4cb0a81ce9522e41e6db7e0389", - strip_prefix = "openssl-OpenSSL_1_1_0h", + url = "https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1n.tar.gz", + sha256 = "6b2d2440ced8c802aaa61475919f0870ec556694c466ebea460e35ea2b14839e", + strip_prefix = "openssl-OpenSSL_1_1_1n", build_file = "@com_github_virustotal_yara//:bazel/openssl.BUILD", ) maybe( From 995af9579370be1b5e2b43d08265afeeb98c901b Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 25 Apr 2022 12:27:20 +0200 Subject: [PATCH 12/28] Fix issue #1690 (#1692) Regular expressions with a character class followed by a dash (-) failed with "bad character range". For example, /[0-9]-2/ was not accepted as a valid regexp. --- libyara/re_lexer.c | 135 +++++++++++++++++++++------------------------ libyara/re_lexer.l | 31 ++++------- tests/test-rules.c | 3 + 3 files changed, 77 insertions(+), 92 deletions(-) diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c index 6e2a1f2bea..a7afef9a36 100644 --- a/libyara/re_lexer.c +++ b/libyara/re_lexer.c @@ -593,9 +593,9 @@ static const flex_int16_t yy_accept[45] = { 0, 0, 0, 0, 0, 30, 7, 7, 28, 6, 17, 7, 27, 29, 26, 18, 5, 3, 16, 15, 13, - 11, 9, 14, 12, 10, 8, 0, 0, 0, 0, - 25, 23, 21, 24, 22, 20, 0, 4, 0, 1, - 2, 19, 0, 0 + 11, 9, 14, 12, 10, 8, 0, 0, 0, 25, + 23, 21, 24, 22, 20, 0, 4, 0, 1, 2, + 19, 0, 0, 0 } ; static const YY_CHAR yy_ec[256] = @@ -632,61 +632,57 @@ static const YY_CHAR yy_ec[256] = static const YY_CHAR yy_meta[23] = { 0, - 1, 2, 1, 1, 3, 4, 4, 4, 4, 1, - 1, 1, 1, 5, 1, 4, 4, 1, 1, 1, + 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, + 1, 1, 1, 3, 1, 2, 2, 1, 1, 1, 1, 1 } ; -static const flex_int16_t yy_base[51] = +static const flex_int16_t yy_base[50] = { 0, - 0, 20, 3, 5, 50, 89, 89, 89, 10, 36, - 0, 44, 43, 47, 38, 89, 26, 33, 89, 89, - 89, 89, 89, 89, 89, 89, 4, 5, 0, 33, - 32, 31, 29, 26, 24, 23, 15, 89, 8, 89, - 89, 89, 0, 89, 67, 72, 77, 82, 84, 4 + 0, 20, 3, 5, 37, 76, 76, 76, 10, 36, + 0, 31, 29, 47, 76, 76, 17, 23, 76, 76, + 76, 76, 76, 76, 76, 76, 4, 5, 0, 76, + 76, 76, 76, 76, 76, 0, 76, 8, 76, 76, + 76, 0, 23, 76, 67, 70, 73, 18, 6 } ; -static const flex_int16_t yy_def[51] = +static const flex_int16_t yy_def[50] = { 0, 45, 45, 46, 46, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 47, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 44, 44, 48, 44, - 44, 44, 44, 44, 44, 44, 49, 44, 44, 44, - 44, 44, 50, 0, 44, 44, 44, 44, 44, 44 + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 47, 44, + 44, 44, 44, 44, 44, 48, 44, 44, 44, 44, + 44, 49, 44, 0, 44, 44, 44, 44, 44 } ; -static const flex_int16_t yy_nxt[112] = +static const flex_int16_t yy_nxt[99] = { 0, - 44, 7, 8, 27, 13, 28, 13, 30, 27, 39, - 28, 9, 10, 39, 8, 14, 15, 14, 15, 29, - 11, 7, 8, 16, 17, 40, 41, 29, 29, 40, - 29, 9, 10, 29, 8, 29, 29, 29, 18, 38, - 11, 18, 29, 19, 20, 21, 22, 29, 29, 44, - 44, 23, 24, 25, 26, 31, 32, 33, 44, 44, - 44, 44, 44, 34, 35, 36, 37, 6, 6, 6, - 6, 6, 12, 12, 12, 12, 12, 30, 44, 30, - 30, 30, 42, 42, 42, 42, 43, 43, 5, 44, + 44, 7, 8, 27, 13, 28, 13, 43, 27, 38, + 28, 9, 10, 38, 8, 14, 15, 14, 15, 42, + 11, 7, 8, 16, 17, 39, 40, 29, 18, 39, + 37, 9, 10, 29, 8, 29, 44, 44, 44, 44, + 11, 18, 44, 19, 20, 21, 22, 44, 44, 44, + 44, 23, 24, 25, 26, 30, 31, 32, 44, 44, + 44, 44, 44, 33, 34, 35, 36, 6, 6, 6, + 12, 12, 12, 41, 41, 5, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44 - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, - 44 } ; -static const flex_int16_t yy_chk[112] = +static const flex_int16_t yy_chk[99] = { 0, - 0, 1, 1, 11, 3, 11, 4, 50, 28, 27, - 28, 1, 1, 39, 1, 3, 3, 4, 4, 37, - 1, 2, 2, 9, 9, 27, 28, 36, 35, 39, - 34, 2, 2, 33, 2, 32, 31, 30, 18, 17, - 2, 10, 15, 10, 10, 10, 10, 13, 12, 5, + 0, 1, 1, 11, 3, 11, 4, 49, 28, 27, + 28, 1, 1, 38, 1, 3, 3, 4, 4, 48, + 1, 2, 2, 9, 9, 27, 28, 43, 18, 38, + 17, 2, 2, 13, 2, 12, 5, 0, 0, 0, + 2, 10, 0, 10, 10, 10, 10, 0, 0, 0, 0, 10, 10, 10, 10, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, 14, 45, 45, 45, - 45, 45, 46, 46, 46, 46, 46, 47, 0, 47, - 47, 47, 48, 48, 48, 48, 49, 49, 44, 44, + 46, 46, 46, 47, 47, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44 - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, - 44 } ; /* Table of booleans, true if rule could match eol. */ @@ -791,10 +787,10 @@ int read_escaped_char( yyscan_t yyscanner, uint8_t* escaped_char); -#line 794 "re_lexer.c" +#line 790 "re_lexer.c" #define YY_NO_UNISTD_H 1 -#line 797 "re_lexer.c" +#line 793 "re_lexer.c" #define INITIAL 0 #define char_class 1 @@ -1070,7 +1066,7 @@ YY_DECL #line 111 "re_lexer.l" -#line 1073 "re_lexer.c" +#line 1069 "re_lexer.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -1378,6 +1374,11 @@ YY_RULE_SETUP { // A range inside a character class. + // + // Matches an hex digit (i.e: \x01), or any character that is + // not ] or \, followed by -, followed by any character that is + // not ] + // // [abc0-9] // ^- matching here @@ -1422,67 +1423,55 @@ YY_RULE_SETUP YY_BREAK case 20: YY_RULE_SETUP -#line 357 "re_lexer.l" +#line 362 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= word_chars[i]; } YY_BREAK case 21: YY_RULE_SETUP -#line 366 "re_lexer.l" +#line 369 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~word_chars[i]; } YY_BREAK case 22: YY_RULE_SETUP -#line 375 "re_lexer.l" +#line 376 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= space_chars[i]; } YY_BREAK case 23: YY_RULE_SETUP -#line 384 "re_lexer.l" +#line 383 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~space_chars[i]; } YY_BREAK case 24: YY_RULE_SETUP -#line 393 "re_lexer.l" +#line 390 "re_lexer.l" { - char c; - - for (c = '0'; c <= '9'; c++) + for (char c = '0'; c <= '9'; c++) LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } YY_BREAK case 25: YY_RULE_SETUP -#line 402 "re_lexer.l" +#line 397 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) { // digits 0-7 are in the sixth byte of the vector, let that byte alone if (i == 6) @@ -1499,7 +1488,7 @@ YY_RULE_SETUP YY_BREAK case 26: YY_RULE_SETUP -#line 422 "re_lexer.l" +#line 415 "re_lexer.l" { uint8_t c; @@ -1517,7 +1506,7 @@ YY_RULE_SETUP YY_BREAK case 27: YY_RULE_SETUP -#line 438 "re_lexer.l" +#line 431 "re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1535,7 +1524,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(char_class): -#line 455 "re_lexer.l" +#line 448 "re_lexer.l" { // End of regexp reached while scanning a character class. @@ -1546,7 +1535,7 @@ case YY_STATE_EOF(char_class): YY_BREAK case 28: YY_RULE_SETUP -#line 464 "re_lexer.l" +#line 457 "re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1561,7 +1550,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(INITIAL): -#line 478 "re_lexer.l" +#line 471 "re_lexer.l" { yyterminate(); @@ -1569,10 +1558,10 @@ case YY_STATE_EOF(INITIAL): YY_BREAK case 29: YY_RULE_SETUP -#line 483 "re_lexer.l" +#line 476 "re_lexer.l" ECHO; YY_BREAK -#line 1575 "re_lexer.c" +#line 1564 "re_lexer.c" case YY_END_OF_BUFFER: { @@ -2721,7 +2710,7 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 483 "re_lexer.l" +#line 476 "re_lexer.l" int escaped_char_value( diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l index d333a6d9b9..42406ec089 100644 --- a/libyara/re_lexer.l +++ b/libyara/re_lexer.l @@ -308,9 +308,14 @@ hex_digit [0-9a-fA-F] -(\\x{hex_digit}{2}|\\.|[^\\])\-[^]] { +(\\x{hex_digit}{2}|[^]\\])-[^]] { // A range inside a character class. + // + // Matches an hex digit (i.e: \x01), or any character that is + // not ] or \, followed by -, followed by any character that is + // not ] + // // [abc0-9] // ^- matching here @@ -356,54 +361,42 @@ hex_digit [0-9a-fA-F] \\w { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= word_chars[i]; } \\W { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~word_chars[i]; } \\s { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= space_chars[i]; } \\S { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~space_chars[i]; } \\d { - char c; - - for (c = '0'; c <= '9'; c++) + for (char c = '0'; c <= '9'; c++) LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } \\D { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) { // digits 0-7 are in the sixth byte of the vector, let that byte alone if (i == 6) diff --git a/tests/test-rules.c b/tests/test-rules.c index 4846acfc09..6ec8e87d75 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -2212,6 +2212,9 @@ void test_re() assert_true_regexp("a[\\-b]", "ab", "ab"); assert_true_regexp("a]", "a]", "a]"); assert_true_regexp("a[]]b", "a]b", "a]b"); + assert_true_regexp("[a-z]-b", "c-b-c", "c-b"); // Issue #1690 + assert_true_regexp("a[]-]b", "a]b", "a]b"); + assert_true_regexp("a[]-]b", "a-b", "a-b"); assert_true_regexp("a[\\]]b", "a]b", "a]b"); assert_true_regexp("a[^bc]d", "aed", "aed"); assert_false_regexp("a[^bc]d", "abd"); From 457a03a27e70b5efd7b502895fcfe1f2dd81837e Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 25 Apr 2022 16:02:16 +0200 Subject: [PATCH 13/28] Fix regression introduced in #1692. --- libyara/re_lexer.c | 128 +++++++++++++++++++++++++-------------------- libyara/re_lexer.l | 24 ++++++--- tests/test-rules.c | 2 + 3 files changed, 92 insertions(+), 62 deletions(-) diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c index a7afef9a36..7b96701432 100644 --- a/libyara/re_lexer.c +++ b/libyara/re_lexer.c @@ -593,9 +593,9 @@ static const flex_int16_t yy_accept[45] = { 0, 0, 0, 0, 0, 30, 7, 7, 28, 6, 17, 7, 27, 29, 26, 18, 5, 3, 16, 15, 13, - 11, 9, 14, 12, 10, 8, 0, 0, 0, 25, - 23, 21, 24, 22, 20, 0, 4, 0, 1, 2, - 19, 0, 0, 0 + 11, 9, 14, 12, 10, 8, 0, 0, 0, 0, + 25, 23, 21, 24, 22, 20, 0, 4, 0, 1, + 2, 19, 0, 0 } ; static const YY_CHAR yy_ec[256] = @@ -632,57 +632,61 @@ static const YY_CHAR yy_ec[256] = static const YY_CHAR yy_meta[23] = { 0, - 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, - 1, 1, 1, 3, 1, 2, 2, 1, 1, 1, + 1, 2, 1, 1, 3, 4, 4, 4, 4, 1, + 1, 1, 1, 5, 1, 4, 4, 1, 1, 1, 1, 1 } ; -static const flex_int16_t yy_base[50] = +static const flex_int16_t yy_base[51] = { 0, - 0, 20, 3, 5, 37, 76, 76, 76, 10, 36, - 0, 31, 29, 47, 76, 76, 17, 23, 76, 76, - 76, 76, 76, 76, 76, 76, 4, 5, 0, 76, - 76, 76, 76, 76, 76, 0, 76, 8, 76, 76, - 76, 0, 23, 76, 67, 70, 73, 18, 6 + 0, 20, 3, 5, 49, 89, 89, 89, 10, 36, + 0, 43, 38, 47, 89, 89, 26, 33, 89, 89, + 89, 89, 89, 89, 89, 89, 4, 5, 0, 33, + 32, 31, 29, 26, 24, 23, 15, 89, 8, 89, + 89, 89, 0, 89, 67, 72, 77, 82, 84, 4 } ; -static const flex_int16_t yy_def[50] = +static const flex_int16_t yy_def[51] = { 0, 45, 45, 46, 46, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 44, 44, 47, 44, - 44, 44, 44, 44, 44, 48, 44, 44, 44, 44, - 44, 49, 44, 0, 44, 44, 44, 44, 44 + 44, 44, 44, 47, 44, 44, 44, 44, 44, 44, + 44, 44, 44, 44, 44, 44, 44, 44, 48, 44, + 44, 44, 44, 44, 44, 44, 49, 44, 44, 44, + 44, 44, 50, 0, 44, 44, 44, 44, 44, 44 } ; -static const flex_int16_t yy_nxt[99] = +static const flex_int16_t yy_nxt[112] = { 0, - 44, 7, 8, 27, 13, 28, 13, 43, 27, 38, - 28, 9, 10, 38, 8, 14, 15, 14, 15, 42, - 11, 7, 8, 16, 17, 39, 40, 29, 18, 39, - 37, 9, 10, 29, 8, 29, 44, 44, 44, 44, - 11, 18, 44, 19, 20, 21, 22, 44, 44, 44, - 44, 23, 24, 25, 26, 30, 31, 32, 44, 44, - 44, 44, 44, 33, 34, 35, 36, 6, 6, 6, - 12, 12, 12, 41, 41, 5, 44, 44, 44, 44, + 44, 7, 8, 27, 13, 28, 13, 30, 27, 39, + 28, 9, 10, 39, 8, 14, 15, 14, 15, 29, + 11, 7, 8, 16, 17, 40, 41, 29, 29, 40, + 29, 9, 10, 29, 8, 29, 29, 29, 18, 38, + 11, 18, 29, 19, 20, 21, 22, 29, 44, 44, + 44, 23, 24, 25, 26, 31, 32, 33, 44, 44, + 44, 44, 44, 34, 35, 36, 37, 6, 6, 6, + 6, 6, 12, 12, 12, 12, 12, 30, 44, 30, + 30, 30, 42, 42, 42, 42, 43, 43, 5, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 44, 44 + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44 } ; -static const flex_int16_t yy_chk[99] = +static const flex_int16_t yy_chk[112] = { 0, - 0, 1, 1, 11, 3, 11, 4, 49, 28, 27, - 28, 1, 1, 38, 1, 3, 3, 4, 4, 48, - 1, 2, 2, 9, 9, 27, 28, 43, 18, 38, - 17, 2, 2, 13, 2, 12, 5, 0, 0, 0, - 2, 10, 0, 10, 10, 10, 10, 0, 0, 0, + 0, 1, 1, 11, 3, 11, 4, 50, 28, 27, + 28, 1, 1, 39, 1, 3, 3, 4, 4, 37, + 1, 2, 2, 9, 9, 27, 28, 36, 35, 39, + 34, 2, 2, 33, 2, 32, 31, 30, 18, 17, + 2, 10, 13, 10, 10, 10, 10, 12, 5, 0, 0, 10, 10, 10, 10, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, 14, 45, 45, 45, - 46, 46, 46, 47, 47, 44, 44, 44, 44, 44, + 45, 45, 46, 46, 46, 46, 46, 47, 0, 47, + 47, 47, 48, 48, 48, 48, 49, 49, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, - 44, 44, 44, 44, 44, 44, 44, 44 + 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, + 44 } ; /* Table of booleans, true if rule could match eol. */ @@ -787,10 +791,10 @@ int read_escaped_char( yyscan_t yyscanner, uint8_t* escaped_char); -#line 790 "re_lexer.c" +#line 794 "re_lexer.c" #define YY_NO_UNISTD_H 1 -#line 793 "re_lexer.c" +#line 797 "re_lexer.c" #define INITIAL 0 #define char_class 1 @@ -1066,7 +1070,7 @@ YY_DECL #line 111 "re_lexer.l" -#line 1069 "re_lexer.c" +#line 1073 "re_lexer.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -1373,14 +1377,26 @@ YY_RULE_SETUP #line 311 "re_lexer.l" { - // A range inside a character class. + // A range inside a character class. The regexp is... + // + // ( \x{hex_digit}{2} Hex digit (i.e: \x01) ... + // | \. ...or any escaped character (i.e. \\, \-) ... + // | [^]\] ...or any character except ] and \ ... + // ) + // - ... followed by - + // [^]] ... followed by any character except ] // - // Matches an hex digit (i.e: \x01), or any character that is - // not ] or \, followed by -, followed by any character that is - // not ] + // Some examples: // // [abc0-9] - // ^- matching here + // ^-^ matching range 0-9 + // + // [a-za-] + // ^-^- matching range a-z + // + // [\.-a] + // ^--^- matching range \.-a + // uint16_t c; uint8_t start = yytext[0]; @@ -1423,7 +1439,7 @@ YY_RULE_SETUP YY_BREAK case 20: YY_RULE_SETUP -#line 362 "re_lexer.l" +#line 374 "re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1432,7 +1448,7 @@ YY_RULE_SETUP YY_BREAK case 21: YY_RULE_SETUP -#line 369 "re_lexer.l" +#line 381 "re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1441,7 +1457,7 @@ YY_RULE_SETUP YY_BREAK case 22: YY_RULE_SETUP -#line 376 "re_lexer.l" +#line 388 "re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1450,7 +1466,7 @@ YY_RULE_SETUP YY_BREAK case 23: YY_RULE_SETUP -#line 383 "re_lexer.l" +#line 395 "re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1459,7 +1475,7 @@ YY_RULE_SETUP YY_BREAK case 24: YY_RULE_SETUP -#line 390 "re_lexer.l" +#line 402 "re_lexer.l" { for (char c = '0'; c <= '9'; c++) @@ -1468,7 +1484,7 @@ YY_RULE_SETUP YY_BREAK case 25: YY_RULE_SETUP -#line 397 "re_lexer.l" +#line 409 "re_lexer.l" { for (int i = 0; i < 32; i++) @@ -1488,7 +1504,7 @@ YY_RULE_SETUP YY_BREAK case 26: YY_RULE_SETUP -#line 415 "re_lexer.l" +#line 427 "re_lexer.l" { uint8_t c; @@ -1506,7 +1522,7 @@ YY_RULE_SETUP YY_BREAK case 27: YY_RULE_SETUP -#line 431 "re_lexer.l" +#line 443 "re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1524,7 +1540,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(char_class): -#line 448 "re_lexer.l" +#line 460 "re_lexer.l" { // End of regexp reached while scanning a character class. @@ -1535,7 +1551,7 @@ case YY_STATE_EOF(char_class): YY_BREAK case 28: YY_RULE_SETUP -#line 457 "re_lexer.l" +#line 469 "re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1550,7 +1566,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(INITIAL): -#line 471 "re_lexer.l" +#line 483 "re_lexer.l" { yyterminate(); @@ -1558,10 +1574,10 @@ case YY_STATE_EOF(INITIAL): YY_BREAK case 29: YY_RULE_SETUP -#line 476 "re_lexer.l" +#line 488 "re_lexer.l" ECHO; YY_BREAK -#line 1564 "re_lexer.c" +#line 1580 "re_lexer.c" case YY_END_OF_BUFFER: { @@ -2710,7 +2726,7 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 476 "re_lexer.l" +#line 488 "re_lexer.l" int escaped_char_value( diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l index 42406ec089..6cdc30b645 100644 --- a/libyara/re_lexer.l +++ b/libyara/re_lexer.l @@ -308,16 +308,28 @@ hex_digit [0-9a-fA-F] -(\\x{hex_digit}{2}|[^]\\])-[^]] { +(\\x{hex_digit}{2}|\\.|[^]\\])-[^]] { - // A range inside a character class. + // A range inside a character class. The regexp is... // - // Matches an hex digit (i.e: \x01), or any character that is - // not ] or \, followed by -, followed by any character that is - // not ] + // ( \x{hex_digit}{2} Hex digit (i.e: \x01) ... + // | \. ...or any escaped character (i.e. \\, \-) ... + // | [^]\] ...or any character except ] and \ ... + // ) + // - ... followed by - + // [^]] ... followed by any character except ] + // + // Some examples: // // [abc0-9] - // ^- matching here + // ^-^ matching range 0-9 + // + // [a-za-] + // ^-^- matching range a-z + // + // [\.-a] + // ^--^- matching range \.-a + // uint16_t c; uint8_t start = yytext[0]; diff --git a/tests/test-rules.c b/tests/test-rules.c index 6ec8e87d75..ffb9972586 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -2215,6 +2215,8 @@ void test_re() assert_true_regexp("[a-z]-b", "c-b-c", "c-b"); // Issue #1690 assert_true_regexp("a[]-]b", "a]b", "a]b"); assert_true_regexp("a[]-]b", "a-b", "a-b"); + assert_true_regexp("[\\.-z]*", "...abc", "...abc"); + assert_true_regexp("[\\.-]*", "...abc", "..."); assert_true_regexp("a[\\]]b", "a]b", "a]b"); assert_true_regexp("a[^bc]d", "aed", "aed"); assert_false_regexp("a[^bc]d", "abd"); From 037c536fbefe58525895d30a5c9a29ff57ecdb48 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 26 Apr 2022 09:20:33 +0200 Subject: [PATCH 14/28] Fix heap overflow in ARM. In ARM the pointer returned by yr_notebook_alloc was being rounded up to a 4-bytes boundary, but the size of the buffer was not increased accordingly. This means the caller of yr_notebook_alloc could receive a buffer that is 1 to 3 bytes smaller than the requested sized. --- libyara/notebook.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/libyara/notebook.c b/libyara/notebook.c index 754c9ed8c9..6036865f0e 100644 --- a/libyara/notebook.c +++ b/libyara/notebook.c @@ -140,6 +140,13 @@ int yr_notebook_destroy(YR_NOTEBOOK* notebook) // void* yr_notebook_alloc(YR_NOTEBOOK* notebook, size_t size) { + // In ARM make sure the buffer's size is rounded up to a multiple of 4, + // which also implies that the returned pointers are aligned to 4 bytes. + +#if defined(__arm__) + size = (size + 3) & ~0x3; +#endif + // The requested memory size can't be larger than a notebook's page. assert(size <= notebook->page_size); @@ -160,17 +167,6 @@ void* yr_notebook_alloc(YR_NOTEBOOK* notebook, size_t size) void* ptr = notebook->page_list_head->data + notebook->page_list_head->used; -// In ARM make sure the alignment of the returned buffer is 4 bytes. -#if defined(__arm__) - uintptr_t misalignment = (uintptr_t) ptr & 3; - - if (misalignment) - { - size += 4 - misalignment; - ptr += 4 - misalignment; - } -#endif - notebook->page_list_head->used += size; return ptr; From ea0af88d96f8eaa5d613520c39572f2541e8e0aa Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 10 May 2022 11:51:53 +0200 Subject: [PATCH 15/28] Fix memory leaks in hash module. (#1705) Fix memory leaks described in #1703. --- libyara/modules/hash/hash.c | 77 ++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/libyara/modules/hash/hash.c b/libyara/modules/hash/hash.c index caff0fc07a..003fa94d3c 100644 --- a/libyara/modules/hash/hash.c +++ b/libyara/modules/hash/hash.c @@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MODULE_NAME hash - typedef struct _CACHE_KEY { int64_t offset; @@ -43,7 +42,6 @@ typedef struct _CACHE_KEY } CACHE_KEY; - const uint32_t crc32_tab[] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, @@ -89,7 +87,6 @@ const uint32_t crc32_tab[] = { 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; - static void digest_to_ascii( unsigned char* digest, char* digest_ascii, @@ -103,7 +100,6 @@ static void digest_to_ascii( digest_ascii[digest_length * 2] = '\0'; } - static char* get_from_cache( YR_OBJECT* module_object, const char* ns, @@ -131,7 +127,6 @@ static char* get_from_cache( return result; } - static int add_to_cache( YR_OBJECT* module_object, const char* ns, @@ -166,7 +161,6 @@ static int add_to_cache( return result; } - define_function(string_md5) { unsigned char digest[YR_MD5_LEN]; @@ -192,7 +186,6 @@ define_function(string_md5) return_string(digest_ascii); } - define_function(string_sha256) { unsigned char digest[YR_SHA256_LEN]; @@ -218,7 +211,6 @@ define_function(string_sha256) return_string(digest_ascii); } - define_function(string_sha1) { unsigned char digest[YR_SHA1_LEN]; @@ -244,7 +236,6 @@ define_function(string_sha1) return_string(digest_ascii); } - define_function(string_checksum32) { size_t i; @@ -252,7 +243,7 @@ define_function(string_checksum32) SIZED_STRING* s = sized_string_argument(1); uint32_t checksum = 0; - for (i = 0; i < s->length; i++) checksum += (uint8_t)(s->c_string[i]); + for (i = 0; i < s->length; i++) checksum += (uint8_t) (s->c_string[i]); YR_DEBUG_FPRINTF( 2, @@ -265,7 +256,6 @@ define_function(string_checksum32) return_integer(checksum); } - define_function(data_md5) { yr_md5_ctx md5_context; @@ -298,11 +288,10 @@ define_function(data_md5) { YR_DEBUG_FPRINTF( 2, stderr, "} // %s() = YR_UNDEFINED // block == NULL\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_md5_init(&md5_context); - if (offset < 0 || length < 0 || offset < block->base) { YR_DEBUG_FPRINTF( @@ -310,6 +299,7 @@ define_function(data_md5) stderr, "} // %s() = YR_UNDEFINED // bad offset / length\n", __FUNCTION__); + return_string(YR_UNDEFINED); } @@ -323,9 +313,12 @@ define_function(data_md5) "} // %s() = %s (cached)\n", __FUNCTION__, cached_ascii_digest); + return_string(cached_ascii_digest); } + yr_md5_init(&md5_context); + foreach_memory_block(iterator, block) { // if desired block within current block @@ -336,9 +329,9 @@ define_function(data_md5) if (block_data != NULL) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( - length, (size_t)(block->size - data_offset)); + length, (size_t) (block->size - data_offset)); offset += data_len; length -= data_len; @@ -361,6 +354,9 @@ define_function(data_md5) stderr, "} // %s() = YR_UNDEFINED // past_first_block\n", __FUNCTION__); + + yr_md5_final(digest, &md5_context); + return_string(YR_UNDEFINED); } @@ -368,6 +364,8 @@ define_function(data_md5) break; } + yr_md5_final(digest, &md5_context); + if (!past_first_block) { YR_DEBUG_FPRINTF( @@ -375,11 +373,10 @@ define_function(data_md5) stderr, "} // %s() = YR_UNDEFINED // !past_first_block\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_md5_final(digest, &md5_context); - digest_to_ascii(digest, digest_ascii, YR_MD5_LEN); FAIL_ON_ERROR( @@ -389,7 +386,6 @@ define_function(data_md5) return_string(digest_ascii); } - define_function(data_sha1) { yr_sha1_ctx sha_context; @@ -422,11 +418,10 @@ define_function(data_sha1) { YR_DEBUG_FPRINTF( 2, stderr, "} // %s() = YR_UNDEFINED // block == NULL\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha1_init(&sha_context); - if (offset < 0 || length < 0 || offset < block->base) { YR_DEBUG_FPRINTF( @@ -434,6 +429,7 @@ define_function(data_sha1) stderr, "} // %s() = YR_UNDEFINED // bad offset / length\n", __FUNCTION__); + return_string(YR_UNDEFINED); } @@ -448,9 +444,12 @@ define_function(data_sha1) "} // %s() = %s (cached)\n", __FUNCTION__, cached_ascii_digest); + return_string(cached_ascii_digest); } + yr_sha1_init(&sha_context); + foreach_memory_block(iterator, block) { // if desired block within current block @@ -460,7 +459,7 @@ define_function(data_sha1) if (block_data != NULL) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( length, (size_t) block->size - data_offset); @@ -485,6 +484,8 @@ define_function(data_sha1) stderr, "} // %s() = YR_UNDEFINED // past_first_block\n", __FUNCTION__); + + yr_sha1_final(digest, &sha_context); return_string(YR_UNDEFINED); } @@ -492,6 +493,8 @@ define_function(data_sha1) break; } + yr_sha1_final(digest, &sha_context); + if (!past_first_block) { YR_DEBUG_FPRINTF( @@ -499,11 +502,10 @@ define_function(data_sha1) stderr, "} // %s() = YR_UNDEFINED // !past_first_block\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha1_final(digest, &sha_context); - digest_to_ascii(digest, digest_ascii, YR_SHA1_LEN); FAIL_ON_ERROR( @@ -513,7 +515,6 @@ define_function(data_sha1) return_string(digest_ascii); } - define_function(data_sha256) { yr_sha256_ctx sha256_context; @@ -546,11 +547,10 @@ define_function(data_sha256) { YR_DEBUG_FPRINTF( 2, stderr, "} // %s() = YR_UNDEFINED // block == NULL\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha256_init(&sha256_context); - if (offset < 0 || length < 0 || offset < block->base) { YR_DEBUG_FPRINTF( @@ -558,6 +558,7 @@ define_function(data_sha256) stderr, "} // %s() = YR_UNDEFINED // bad offset / length\n", __FUNCTION__); + return_string(YR_UNDEFINED); } @@ -572,9 +573,12 @@ define_function(data_sha256) "} // %s() = %s (cached)\n", __FUNCTION__, cached_ascii_digest); + return_string(cached_ascii_digest); } + yr_sha256_init(&sha256_context); + foreach_memory_block(iterator, block) { // if desired block within current block @@ -584,7 +588,7 @@ define_function(data_sha256) if (block_data != NULL) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min(length, block->size - data_offset); offset += data_len; @@ -608,6 +612,8 @@ define_function(data_sha256) stderr, "} // %s() = YR_UNDEFINED // past_first_block\n", __FUNCTION__); + + yr_sha256_final(digest, &sha256_context); return_string(YR_UNDEFINED); } @@ -615,6 +621,8 @@ define_function(data_sha256) break; } + yr_sha256_final(digest, &sha256_context); + if (!past_first_block) { YR_DEBUG_FPRINTF( @@ -622,11 +630,10 @@ define_function(data_sha256) stderr, "} // %s() = YR_UNDEFINED // !past_first_block\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha256_final(digest, &sha256_context); - digest_to_ascii(digest, digest_ascii, YR_SHA256_LEN); FAIL_ON_ERROR( @@ -636,7 +643,6 @@ define_function(data_sha256) return_string(digest_ascii); } - define_function(data_checksum32) { int64_t offset = integer_argument(1); // offset where to start @@ -673,7 +679,7 @@ define_function(data_checksum32) { size_t i; - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min(length, block->size - data_offset); offset += data_len; @@ -707,7 +713,6 @@ define_function(data_checksum32) return_integer(checksum); } - define_function(string_crc32) { size_t i; @@ -729,7 +734,6 @@ define_function(string_crc32) return_integer(checksum ^ 0xFFFFFFFF); } - define_function(data_crc32) { int64_t offset = integer_argument(1); // offset where to start @@ -766,7 +770,7 @@ define_function(data_crc32) { size_t i; - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min(length, block->size - data_offset); offset += data_len; @@ -803,7 +807,6 @@ define_function(data_crc32) return_integer(checksum ^ 0xFFFFFFFF); } - begin_declarations declare_function("md5", "ii", "s", data_md5); declare_function("md5", "s", "s", string_md5); @@ -821,7 +824,6 @@ begin_declarations declare_function("crc32", "s", "i", string_crc32); end_declarations - int module_initialize(YR_MODULE* module) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {}\n", __FUNCTION__); @@ -829,7 +831,6 @@ int module_initialize(YR_MODULE* module) return ERROR_SUCCESS; } - int module_finalize(YR_MODULE* module) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {}\n", __FUNCTION__); @@ -837,7 +838,6 @@ int module_finalize(YR_MODULE* module) return ERROR_SUCCESS; } - int module_load( YR_SCAN_CONTEXT* context, YR_OBJECT* module_object, @@ -855,7 +855,6 @@ int module_load( return ERROR_SUCCESS; } - int module_unload(YR_OBJECT* module_object) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {}\n", __FUNCTION__); From 32faec66257c6c3bccecae8dfe19b37e4f73575c Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Tue, 10 May 2022 16:23:28 +0200 Subject: [PATCH 16/28] Bump version number to 4.2.2 --- libyara/include/yara/libyara.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libyara/include/yara/libyara.h b/libyara/include/yara/libyara.h index e7b7323af2..c87fc5b15d 100644 --- a/libyara/include/yara/libyara.h +++ b/libyara/include/yara/libyara.h @@ -34,7 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define YR_MAJOR_VERSION 4 #define YR_MINOR_VERSION 2 -#define YR_MICRO_VERSION 0 +#define YR_MICRO_VERSION 2 #define version_str(s) _version_str(s) #define _version_str(s) #s From dc523c0b91a71320faa981cc9d4c290feac1fc85 Mon Sep 17 00:00:00 2001 From: MatejKastak Date: Sat, 28 May 2022 19:47:57 +0200 Subject: [PATCH 17/28] Magic tests fix (#1714) * test: Relax pe32 condition in magic tests Turns out different versions of libmagic return different file identification for the PE32 sample in magic tests. Relax this condition to the common part of the identification to make tests less reliant on the testing platform. * test: Relax macho condition in magic tests --- tests/test-magic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test-magic.c b/tests/test-magic.c index adb0971e70..ff2f34c868 100644 --- a/tests/test-magic.c +++ b/tests/test-magic.c @@ -23,7 +23,7 @@ int main(int argc, char** argv) assert_true_rule_blob( "import \"magic\" rule test { condition: \ - magic.type() contains \"PE32\" and \ + magic.type() contains \"MS-DOS executable\" and \ magic.mime_type() == \"application/x-dosexec\" }", PE32_FILE); @@ -31,7 +31,7 @@ int main(int argc, char** argv) assert_true_rule_blob( "import \"magic\" rule test { condition: \ magic.type() contains \"Mach-O\" and \ - magic.mime_type() == \"application/x-mach-binary\" and \ + (magic.mime_type() == \"application/x-mach-binary\" or magic.mime_type() == \"application/octet-stream\") and \ magic.type() contains \"Mach-O\"}", MACHO_X86_FILE); From ee654865b90e7dd85e2df0506290f4362cdb91b3 Mon Sep 17 00:00:00 2001 From: dangodangodango <39194518+dangodangodango@users.noreply.github.com> Date: Sun, 29 May 2022 04:02:21 +0800 Subject: [PATCH 18/28] Fix issue #1708 (#1710) * Fix issue #1708 * Add test case for #1708 Build a dotnet pe that triggers this issue: https://github.com/dangodangodango/BadDotnetPe --- libyara/modules/dotnet/dotnet.c | 7 ++++--- tests/BUILD.bazel | 1 + tests/data/bad_dotnet_pe | Bin 0 -> 4096 bytes tests/test-dotnet.c | 9 +++++++++ 4 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 tests/data/bad_dotnet_pe diff --git a/libyara/modules/dotnet/dotnet.c b/libyara/modules/dotnet/dotnet.c index 112e6fb092..8e1b7a2f56 100644 --- a/libyara/modules/dotnet/dotnet.c +++ b/libyara/modules/dotnet/dotnet.c @@ -1637,6 +1637,7 @@ static bool dotnet_is_dotnet(PE* pe) int64_t metadata_root = pe_rva_to_offset( pe, yr_le32toh(cli_header->MetaData.VirtualAddress)); + offset = metadata_root; if (!struct_fits_in_pe(pe, pe->data + metadata_root, NET_METADATA)) return false; @@ -1650,7 +1651,7 @@ static bool dotnet_is_dotnet(PE* pe) // Also make sure it fits in pe. uint32_t md_len = yr_le32toh(metadata->Length); if (md_len == 0 || md_len > 255 || md_len % 4 != 0 || - !fits_in_pe(pe, pe->data + offset, md_len)) + !fits_in_pe(pe, pe->data + offset + sizeof(NET_METADATA), md_len)) { return false; } @@ -1667,7 +1668,7 @@ static bool dotnet_is_dotnet(PE* pe) int64_t entry_offset = pe_rva_to_offset( pe, yr_le32toh(pe->header->OptionalHeader.AddressOfEntryPoint)); - if (offset < 0 || !fits_in_pe(pe, pe->data + entry_offset, 2)) + if (entry_offset < 0 || !fits_in_pe(pe, pe->data + entry_offset, 2)) return false; const uint8_t* entry_data = pe->data + entry_offset; @@ -1721,7 +1722,7 @@ void dotnet_parse_com(PE* pe) md_len = yr_le32toh(metadata->Length); if (md_len == 0 || md_len > 255 || md_len % 4 != 0 || - !fits_in_pe(pe, pe->data + offset, md_len)) + !fits_in_pe(pe, pe->data + offset + sizeof(NET_METADATA), md_len)) { return; } diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index ee573fbba6..86b688dae4 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -241,6 +241,7 @@ cc_test( data = [ "data/tiny", "data/0ca09bde7602769120fadc4f7a4147347a7a97271370583586c9e587fd396171", + "data/bad_dotnet_pe", ], linkstatic = True, deps = [ diff --git a/tests/data/bad_dotnet_pe b/tests/data/bad_dotnet_pe new file mode 100644 index 0000000000000000000000000000000000000000..b4e07c1c6b5a0006684da164debb334440d7768f GIT binary patch literal 4096 zcmeHKU2GIp6h60qlm+?|u&HQppdbaC$<~%CsAad^mR9;_cWF^e+S%>xGB`W4nVE&w z2ke6y9#s0|qX~&of+mt^A|cTSW1=r4K4@Z85+5Z#KunCpsQ8^bv)yeg7+-uKob5e- z_uO;OIdkXS>F{$)R7pft7`JW_T>@o@DST(Ni0QhAF4xgV>#uCTB!;eRPaQWsE$=#6 z*T`uZ!?qn?OH0iy*qUi;iP5B%b7rN!X;Wlp=z6S=Xh>Aj56QQ07I*uRc4!+!l&BV( znm~K{VNeYthC$Sz_=+HIR&e|Z7J;E>sHB-!*p&ZDG!$6{XoKHTW@1E9F2rh>A=(IC zY8S@pxjV^P5Ps357V#npMHe??&A*}-~gBVdynW)$qt}B3JtDtJSVG!NG9ztpVJJMaO%(repkbAdPw4!v8 zy*y(Rvns7ai0T9VMyxL`a;?}~c*4pTMc(=;@#;wJ1Q&FA1(Aog>JrxCb@?gx(09^eM7WV~PDxWa=9Kcn!o3Y&D0 z?jr+y4e7Ljb^~vwDDVR~>pFHj3j7GY0o*}v0Y5?S0r%qV*Xgj*CxNRLgZj`ippeBE zQt7tBD+3?mB5bAYMYh*6Jg=Re9Cl_4mh7gvf}NoOX<5#s<65(nT=0CE(|aA;b1YdD z#|yS^<|IwJrZ0y~TT()%3)!rZwq)GLu}BwuNpa7UxwN&AGW~LO%5byN???ZUryTc% zwf6m{B`2ionU1}7a?qZ0+??TK(y(r8g?Q)8Y{6A_B`3X3E^k^&IW8?@UU6P&z8LYC zi*jWAwG-^Xuop@bVI`E_H`AtN`en0W^+F>(3>Pm=S&8*o%L?ndTwXb6$qc7Rdd7F$ zqCR07S=;e^GvgIiMb|-Q(oIVDq?wUk_&~4~o-D-C^a*$*YBwo|6J(Kzcg4gBv}qpH z0nJhj!@#_Wd7U!Q=5S8VF8zGr%Wtm@FFy9i`@gRL(5EU*h}uexL^YVkMi%R;2&ner z!ReP8zlj_`HxRW#HB6#y{8FVVnkHQ%KjPSZ^BI}v+)~F~=ak2G=mvm&f|~V_zErQ{ z%6LBC7FO%2lU;fg4(hfnADM*dW9-JLMh~tIngpZ()Rk2fK*y7bWb-@G z_{PsZ=>NFs$n~px)^)Rw-UCyoys25ox1~Q-3bmYd(kG_Epr>Gkgv)02{A`+%1Mx@q zby4Vl;MAS^QxN^hb_Qo+^qh~w*b~qX%`Z{>kSOuPM>{}sFqe3}^odN2^w=T4bhrA;WS33L+|ez>1V__S#b zJ%wSXT9v{qcOVb*hU!P$hXS}*)d{BpG5N4ERexIRsX^I9amNL}PD-o$t_ydnz-`GL zEU)%hKt|l#&=mj!pcZ}(O=7s~qobd$KyYLY7 zSKgoT2K*Z={UTS zY61OfpV3gy)nw~po-75G`M3Rox{kq*gD489jb9e+?duxF_lKaB>sD8%R2!nlRUPA~ rorm0Vc&Zk%(r`{!R Date: Wed, 1 Jun 2022 09:43:33 +0200 Subject: [PATCH 19/28] Fix issue #1719. --- libyara/proc/linux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index 7f1536712b..4de4529d6b 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -406,7 +406,7 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( stderr, "- %s() {} = %p // .base=0x%" PRIx64 " .size=%" PRIu64 "\n", __FUNCTION__, - result, + context->current_block, context->current_block.base, context->current_block.size); From 9560b200ce5ebc7a5b03314b27d1641228bc3e1a Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 30 Jun 2022 10:41:45 +0200 Subject: [PATCH 20/28] Fix null derefrences while loading compiled rules (#1727) * Fix null derefrences while loading compiled rules * Fix nulldereference in yr_object_create * Fix assert to explicitly catch null identifier in yr_object_create --- libyara/arena.c | 6 +++--- libyara/object.c | 1 + libyara/rules.c | 3 +++ 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/libyara/arena.c b/libyara/arena.c index 04d0006b68..784d9ad972 100644 --- a/libyara/arena.c +++ b/libyara/arena.c @@ -434,8 +434,8 @@ int yr_arena_ptr_to_ref(YR_ARENA* arena, const void* address, YR_ARENA_REF* ref) (uint8_t*) address < arena->buffers[i].data + arena->buffers[i].used) { ref->buffer_id = i; - ref->offset = (yr_arena_off_t)( - (uint8_t*) address - arena->buffers[i].data); + ref->offset = + (yr_arena_off_t) ((uint8_t*) address - arena->buffers[i].data); return 1; } @@ -583,7 +583,7 @@ int yr_arena_load_stream(YR_STREAM* stream, YR_ARENA** arena) YR_ARENA_BUFFER* b = &new_arena->buffers[ref.buffer_id]; if (ref.buffer_id >= new_arena->num_buffers || - ref.offset > b->used - sizeof(void*)) + ref.offset > b->used - sizeof(void*) || b->data == NULL) { yr_arena_release(new_arena); return ERROR_CORRUPT_FILE; diff --git a/libyara/object.c b/libyara/object.c index b0a656a7c1..359feabb60 100644 --- a/libyara/object.c +++ b/libyara/object.c @@ -57,6 +57,7 @@ int yr_object_create( size_t object_size = 0; assert(parent != NULL || object != NULL); + assert(identifier != NULL); switch (type) { diff --git a/libyara/rules.c b/libyara/rules.c index df7f15077c..9cd1a2ebae 100644 --- a/libyara/rules.c +++ b/libyara/rules.c @@ -333,6 +333,9 @@ int yr_rules_from_arena(YR_ARENA* arena, YR_RULES** rules) YR_SUMMARY* summary = (YR_SUMMARY*) yr_arena_get_ptr( arena, YR_SUMMARY_SECTION, 0); + if (summary == NULL) + return ERROR_CORRUPT_FILE; + // Now YR_RULES relies on this arena, let's increment the arena's // reference count so that if the original owner of the arena calls // yr_arena_destroy the arena is not destroyed. From da831c29b7ed874d15d6b15374a280936ed8a7e5 Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Wed, 15 Jun 2022 03:57:48 -0400 Subject: [PATCH 21/28] Dex fixes (#1728) * Fix compiler warnings with dex debug mode. * Fix crashes in dex module. This commit fixes a few crashes in the dex module. There are actually three of them: The first is incorrect usage of "struct_fits_in_dex" caused by passing "sizeof(code_item_t)" instead of just "code_item_t" as the third argument. In the test case the pointer for code_item started in the bounds of the dex but only the first 8 bytes were within bounds, and since "sizeof(sizeof(code_item_t))" is less than 8 the check was passing. The fix here is to pass just the struct type as the third argument. The second crash was an off-by-one error when parsing a string. The check ensured the string fits in the dex but was not including an extra byte which was copied in the call to set_sized_string. Just like before, this was a case of a string falling right on the end of a dex file. The third crash was due to a missing "struct_fits_in_dex" check. We ended up with a pointer to a map_item_t which was off the ends of the dex bounds. With this commit all the test cases provided in the report are now passing. I did a quick sweep of the module to make sure there were no other cases where we were incorrectly using "struct_fits_in_dex" and didn't find any. These were all documented at a private report via huntr.dev (https://huntr.dev/bounties/007a7784-c211-4847-9cc3-aec38e7d5157/) Found by @sudhackar. Fixes #1726. --- libyara/modules/dex/dex.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/libyara/modules/dex/dex.c b/libyara/modules/dex/dex.c index 6c9fd499d5..daa38363a3 100644 --- a/libyara/modules/dex/dex.c +++ b/libyara/modules/dex/dex.c @@ -619,7 +619,7 @@ uint32_t load_encoded_field( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tFIELD_NAME %s NAME_IDX 0x%x\n", field_name->c_string, name_idx); + "[DEX]\tFIELD_NAME %s NAME_IDX 0x%llx\n", field_name->c_string, name_idx); #endif set_sized_string( @@ -643,7 +643,7 @@ uint32_t load_encoded_field( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%x DESCRIPTOR_IDX 0x%x\n", + "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%llx DESCRIPTOR_IDX 0x%llx\n", class_name->c_string, class_idx, descriptor_idx); @@ -748,7 +748,7 @@ uint32_t load_encoded_method( return 0; #ifdef DEBUG_DEX_MODULE - printf("[DEX]\tNAME_IDX 0x%x\n", name_idx); + printf("[DEX]\tNAME_IDX 0x%llx\n", name_idx); #endif #ifdef DEBUG_DEX_MODULE @@ -768,7 +768,7 @@ uint32_t load_encoded_method( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tMETHOD_NAME %s NAME_IDX 0x%x\n", + "[DEX]\tMETHOD_NAME %s NAME_IDX 0x%llx\n", method_name->c_string, name_idx); #endif @@ -794,7 +794,7 @@ uint32_t load_encoded_method( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%x DESCRIPTOR_IDX:0x%x\n", + "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%llx DESCRIPTOR_IDX:0x%llx\n", class_name->c_string, class_idx, descriptor_idx); @@ -821,7 +821,7 @@ uint32_t load_encoded_method( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tPROTO_NAME %s CLASS_IDX 0x%x DESCRIPTOR_IDX:0x%x\n", + "[DEX]\tPROTO_NAME %s CLASS_IDX 0x%llx DESCRIPTOR_IDX:0x%llx\n", proto_name->c_string, class_idx, descriptor_idx); @@ -842,7 +842,7 @@ uint32_t load_encoded_method( #endif if (struct_fits_in_dex( - dex, dex->data + encoded_method.code_off, sizeof(code_item_t))) + dex, dex->data + encoded_method.code_off, code_item_t)) { code_item_t* code_item = (code_item_t*) (dex->data + encoded_method.code_off); @@ -954,7 +954,7 @@ void dex_parse(DEX* dex, uint64_t base_address) if (!fits_in_dex( dex, - dex->data + yr_le32toh(string_id_item->string_data_offset), + dex->data + yr_le32toh(string_id_item->string_data_offset) + 1, value)) continue; @@ -967,8 +967,8 @@ void dex_parse(DEX* dex, uint64_t base_address) set_integer(value, dex->object, "string_ids[%i].size", i); set_sized_string( - (const char*) (( - dex->data + yr_le32toh(string_id_item->string_data_offset) + 1)), + (const char*) ( + dex->data + yr_le32toh(string_id_item->string_data_offset) + 1), value, dex->object, "string_ids[%i].value", @@ -1124,6 +1124,9 @@ void dex_parse(DEX* dex, uint64_t base_address) map_item_t* map_item = (map_item_t*) (dex->data + yr_le32toh(dex_header->map_offset) + sizeof(uint32_t) + i * sizeof(map_item_t)); + if (!struct_fits_in_dex(dex, map_item, map_item_t)) + return; + set_integer( yr_le16toh(map_item->type), dex->object, From 9b4372f8753fd3ba8b65bbbfde506df24d84afd0 Mon Sep 17 00:00:00 2001 From: Wesley Shields Date: Thu, 16 Jun 2022 11:41:18 -0400 Subject: [PATCH 22/28] Fix negative indexing in dex module. (#1730) * Fix negative indexing in dex module. When attempting to call dex_get_integer() or dex_get_string() with a negative index we would eventually land in the assert() at https://github.com/VirusTotal/yara/blob/master/libyara/object.c#L497 failing. Instead of doing that let's check for negative values before going any further, which will at least allow the module to continue processing. * YR_UNDEFINED is < 0 already. Simplify the logic when checking for negative index. * Revert "YR_UNDEFINED is < 0 already. Simplify the logic when checking for negative index." This reverts commit 38af38fb8db54dc5fa8be1f28d5fc7648388822c. --- libyara/modules/dex/dex.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libyara/modules/dex/dex.c b/libyara/modules/dex/dex.c index daa38363a3..1fbec8278e 100644 --- a/libyara/modules/dex/dex.c +++ b/libyara/modules/dex/dex.c @@ -418,7 +418,7 @@ static int64_t dex_get_integer( const char* pattern, int64_t index) { - if (index == YR_UNDEFINED) + if (index == YR_UNDEFINED || index < 0) return YR_UNDEFINED; // Impose a reasonably large limit to table indexes. @@ -434,7 +434,7 @@ static SIZED_STRING* dex_get_string( const char* pattern, int64_t index) { - if (index == YR_UNDEFINED) + if (index == YR_UNDEFINED || index < 0) return NULL; // Impose a reasonably large limit to table indexes. From d5a7565a8bd4f312dc75ac6cb064350ee9bd380e Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 30 Jun 2022 11:06:13 +0200 Subject: [PATCH 23/28] Bump version number. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 8df0c8f0c1..64356b4531 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([yara], [4.2.0], [vmalvarez@virustotal.com]) +AC_INIT([yara], [4.2.2], [vmalvarez@virustotal.com]) AM_SILENT_RULES([yes]) AC_CONFIG_SRCDIR([cli/yara.c]) From 731099225c4a5d5c6a35e3fb854928bc0b30fe6b Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Fri, 29 Jul 2022 15:30:43 +0200 Subject: [PATCH 24/28] Add "includes" argument to "yara_library" Bazel rule. --- bazel/yara.bzl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bazel/yara.bzl b/bazel/yara.bzl index 51c0729b1e..09e1512069 100644 --- a/bazel/yara.bzl +++ b/bazel/yara.bzl @@ -61,6 +61,7 @@ module_list = rule( def yara_library( name, defines = [], + includes = [], modules = [], modules_srcs = [], deps = [], @@ -195,7 +196,7 @@ def yara_library( "libyara/include/yara/rules.h", ], copts = copts, - includes = [ + includes = includes + [ "libyara/modules", "libyara/include", "libyara", From 7a99e6dd1212e3dc7e170d5b229703e796d7a314 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Thu, 4 Aug 2022 13:46:03 +0200 Subject: [PATCH 25/28] Fix issue #1757. --- libyara/exec.c | 45 +++++- libyara/grammar.c | 355 +++++++++++++++++++++++++++------------------ libyara/grammar.y | 19 +++ tests/test-rules.c | 22 +++ 4 files changed, 298 insertions(+), 143 deletions(-) diff --git a/libyara/exec.c b/libyara/exec.c index 1a9b2c1c04..e0865e5a33 100644 --- a/libyara/exec.c +++ b/libyara/exec.c @@ -1388,10 +1388,24 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) { YR_DEBUG_FPRINTF(2, stderr, "- case OP_OF: // %s()\n", __FUNCTION__); + // Quantifier is "all" if (is_undef(r2)) + { r1.i = found >= count ? 1 : 0; + } + // Quantifier is 0 or none. This is a special case in which we want + // exactly 0 strings matching. More information at: + // https://github.com/VirusTotal/yara/issues/1695 + else if (r2.i == 0) + { + r1.i = found == 0 ? 1 : 0; + } + // In all other cases the number of strings matching should be at + // least the amount specified by the quantifier. else + { r1.i = found >= r2.i ? 1 : 0; + } } else // OP_OF_PERCENT { @@ -1416,8 +1430,10 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) found = 0; count = 0; - pop(r2); - pop(r1); + + pop(r2); // Offset range end + pop(r1); // Offset range start + ensure_defined(r1); ensure_defined(r2); @@ -1432,6 +1448,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) while (match != NULL) { + // String match within range start and range end? if (match->base + match->offset >= r1.i && match->base + match->offset <= r2.i) { @@ -1439,6 +1456,9 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) break; } + // If current match is past range end, we can stop as matches + // are sortred by offset in increasing order, so all remaining + // matches are part the range end too. if (match->base + match->offset > r1.i) break; @@ -1449,11 +1469,26 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r3); } - pop(r1); - if (is_undef(r1)) + pop(r2); // Quantifier X in expressions like "X of string_set in range" + + // Quantifier is "all". + if (is_undef(r2)) + { r1.i = found >= count ? 1 : 0; + } + // Quantifier is 0 or none. This is a special case in which we want + // exactly 0 strings matching. More information at: + // https://github.com/VirusTotal/yara/issues/1695 + else if (r2.i == 0) + { + r1.i = found == 0 ? 1 : 0; + } + // In all other cases the number of strings matching should be at least + // the amount specified by the quantifier. else - r1.i = found >= r1.i ? 1 : 0; + { + r1.i = found >= r2.i ? 1 : 0; + } push(r1); break; diff --git a/libyara/grammar.c b/libyara/grammar.c index 5ff57ff7d0..deed72c4dd 100644 --- a/libyara/grammar.c +++ b/libyara/grammar.c @@ -921,23 +921,23 @@ static const yytype_int8 yytranslate[] = /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_int16 yyrline[] = { - 0, 326, 326, 327, 328, 329, 330, 331, 332, 340, - 353, 358, 352, 385, 388, 404, 407, 422, 427, 428, - 433, 434, 440, 443, 459, 468, 510, 511, 516, 533, - 547, 561, 575, 593, 594, 600, 599, 616, 615, 636, - 635, 660, 666, 726, 727, 728, 729, 730, 731, 737, - 758, 789, 794, 811, 816, 836, 837, 851, 852, 853, - 854, 855, 859, 860, 874, 878, 973, 1021, 1082, 1129, - 1130, 1134, 1169, 1222, 1264, 1287, 1293, 1299, 1311, 1321, - 1331, 1341, 1351, 1361, 1371, 1381, 1395, 1410, 1421, 1498, - 1536, 1438, 1695, 1694, 1784, 1790, 1796, 1816, 1836, 1842, - 1848, 1854, 1853, 1899, 1898, 1942, 1949, 1956, 1963, 1970, - 1977, 1984, 1988, 1996, 2016, 2044, 2118, 2146, 2154, 2163, - 2187, 2202, 2222, 2221, 2227, 2238, 2239, 2244, 2251, 2263, - 2262, 2272, 2273, 2278, 2309, 2331, 2335, 2340, 2345, 2354, - 2358, 2366, 2378, 2392, 2399, 2406, 2431, 2443, 2455, 2467, - 2482, 2494, 2509, 2552, 2573, 2608, 2643, 2677, 2702, 2719, - 2729, 2739, 2749, 2759, 2779, 2799 + 0, 360, 360, 361, 362, 363, 364, 365, 366, 374, + 387, 392, 386, 419, 422, 438, 441, 456, 461, 462, + 467, 468, 474, 477, 493, 502, 544, 545, 550, 567, + 581, 595, 609, 627, 628, 634, 633, 650, 649, 670, + 669, 694, 700, 760, 761, 762, 763, 764, 765, 771, + 792, 823, 828, 845, 850, 870, 871, 885, 886, 887, + 888, 889, 893, 894, 908, 912, 1007, 1055, 1116, 1161, + 1162, 1166, 1201, 1254, 1296, 1319, 1325, 1331, 1343, 1353, + 1363, 1373, 1383, 1393, 1403, 1413, 1427, 1442, 1453, 1528, + 1566, 1470, 1694, 1705, 1716, 1735, 1754, 1766, 1772, 1778, + 1777, 1823, 1822, 1866, 1873, 1880, 1887, 1894, 1901, 1908, + 1912, 1920, 1921, 1946, 1966, 1994, 2068, 2096, 2104, 2113, + 2156, 2171, 2190, 2200, 2199, 2208, 2222, 2223, 2228, 2238, + 2253, 2252, 2265, 2266, 2271, 2304, 2329, 2377, 2384, 2390, + 2396, 2406, 2410, 2418, 2430, 2444, 2451, 2458, 2483, 2495, + 2507, 2519, 2534, 2546, 2561, 2604, 2625, 2660, 2695, 2729, + 2754, 2771, 2781, 2791, 2801, 2811, 2831, 2851 }; #endif @@ -3975,13 +3975,32 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); result = ERROR_WRONG_TYPE; } + // If we can statically determine lower and upper bounds, ensure + // lower < upper. Check for upper bound here because some things (like + // string count) are EXPRESSION_TYPE_INTEGER. + if ((yyvsp[-3].expression).value.integer != YR_UNDEFINED && (yyvsp[-1].expression).value.integer != YR_UNDEFINED) + { + if ((yyvsp[-3].expression).value.integer > (yyvsp[-1].expression).value.integer) + { + yr_compiler_set_error_extra_info( + compiler, "range lower bound must be less than upper bound"); + result = ERROR_INVALID_VALUE; + } + else if ((yyvsp[-3].expression).value.integer < 0) + { + yr_compiler_set_error_extra_info( + compiler, "range lower bound can not be negative"); + result = ERROR_INVALID_VALUE; + } + } + fail_if_error(result); } -#line 3981 "grammar.c" +#line 4209 "grammar.c" break; case 120: /* integer_enumeration: primary_expression */ -#line 2188 "grammar.y" +#line 2157 "grammar.y" { int result = ERROR_SUCCESS; @@ -3996,11 +4015,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = 1; } -#line 4000 "grammar.c" +#line 4228 "grammar.c" break; case 121: /* integer_enumeration: integer_enumeration ',' primary_expression */ -#line 2203 "grammar.y" +#line 2172 "grammar.y" { int result = ERROR_SUCCESS; @@ -4015,62 +4034,112 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = (yyvsp[-2].integer) + 1; } -#line 4019 "grammar.c" +#line 4247 "grammar.c" break; - case 122: /* $@11: %empty */ -#line 2222 "grammar.y" + case 122: /* string_iterator: string_set */ +#line 2191 "grammar.y" + { + fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); + fail_if_error(yr_parser_emit(yyscanner, OP_ITER_START_STRING_SET, + NULL)); + } +#line 4257 "grammar.c" + break; + + case 123: /* $@10: %empty */ +#line 2200 "grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4028 "grammar.c" +#line 4266 "grammar.c" break; - case 124: /* string_set: "" */ -#line 2228 "grammar.y" + case 124: /* string_set: '(' $@10 string_enumeration ')' */ +#line 2205 "grammar.y" + { + (yyval.integer) = (yyvsp[-1].integer); + } +#line 4274 "grammar.c" + break; + + case 125: /* string_set: "" */ +#line 2209 "grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, YR_UNDEFINED)); fail_if_error(yr_parser_emit_pushes_for_strings( yyscanner, "$*")); } -#line 4039 "grammar.c" +#line 4288 "grammar.c" break; - case 127: /* string_enumeration_item: "string identifier" */ -#line 2245 "grammar.y" + case 126: /* string_enumeration: string_enumeration_item */ +#line 2222 "grammar.y" + { (yyval.integer) = (yyvsp[0].integer); } +#line 4294 "grammar.c" + break; + + case 127: /* string_enumeration: string_enumeration ',' string_enumeration_item */ +#line 2223 "grammar.y" + { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } +#line 4300 "grammar.c" + break; + + case 128: /* string_enumeration_item: "string identifier" */ +#line 2229 "grammar.y" { int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string)); yr_free((yyvsp[0].c_string)); fail_if_error(result); } -#line 4050 "grammar.c" +#line 4314 "grammar.c" break; - case 128: /* string_enumeration_item: "string identifier with wildcard" */ -#line 2252 "grammar.y" + case 129: /* string_enumeration_item: "string identifier with wildcard" */ +#line 2239 "grammar.y" { int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string)); yr_free((yyvsp[0].c_string)); fail_if_error(result); } -#line 4061 "grammar.c" +#line 4328 "grammar.c" break; - case 129: /* $@12: %empty */ -#line 2263 "grammar.y" + case 130: /* $@11: %empty */ +#line 2253 "grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4070 "grammar.c" +#line 4337 "grammar.c" + break; + + case 131: /* rule_set: '(' $@11 rule_enumeration ')' */ +#line 2258 "grammar.y" + { + (yyval.integer) = (yyvsp[-1].integer); + } +#line 4345 "grammar.c" + break; + + case 132: /* rule_enumeration: rule_enumeration_item */ +#line 2265 "grammar.y" + { (yyval.integer) = (yyvsp[0].integer); } +#line 4351 "grammar.c" break; - case 133: /* rule_enumeration_item: "identifier" */ -#line 2279 "grammar.y" + case 133: /* rule_enumeration: rule_enumeration ',' rule_enumeration_item */ +#line 2266 "grammar.y" + { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } +#line 4357 "grammar.c" + break; + + case 134: /* rule_enumeration_item: "identifier" */ +#line 2272 "grammar.y" { int result = ERROR_SUCCESS; @@ -4101,11 +4170,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4105 "grammar.c" +#line 4394 "grammar.c" break; - case 134: /* rule_enumeration_item: "identifier" '*' */ -#line 2310 "grammar.y" + case 135: /* rule_enumeration_item: "identifier" '*' */ +#line 2305 "grammar.y" { YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( compiler->arena, @@ -4123,54 +4192,64 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4127 "grammar.c" +#line 4419 "grammar.c" break; - case 135: /* for_expression: primary_expression */ -#line 2332 "grammar.y" + case 136: /* for_expression: primary_expression */ +#line 2330 "grammar.y" { (yyval.integer) = FOR_EXPRESSION_ANY; } -#line 4135 "grammar.c" +#line 4471 "grammar.c" break; - case 136: /* for_expression: "" */ -#line 2336 "grammar.y" + case 137: /* for_expression: for_quantifier */ +#line 2378 "grammar.y" { yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); (yyval.integer) = FOR_EXPRESSION_ALL; } -#line 4144 "grammar.c" +#line 4479 "grammar.c" + break; + + case 138: /* for_quantifier: "" */ +#line 2385 "grammar.y" + { + yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); + (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; + (yyval.expression).value.integer = FOR_EXPRESSION_ALL; + } +#line 4489 "grammar.c" break; - case 137: /* for_expression: "" */ -#line 2341 "grammar.y" + case 139: /* for_quantifier: "" */ +#line 2391 "grammar.y" { yr_parser_emit_push_const(yyscanner, 1); (yyval.integer) = FOR_EXPRESSION_ANY; } -#line 4153 "grammar.c" +#line 4499 "grammar.c" break; - case 138: /* for_expression: "" */ -#line 2346 "grammar.y" + case 140: /* for_quantifier: "" */ +#line 2397 "grammar.y" { yr_parser_emit_push_const(yyscanner, 0); (yyval.integer) = FOR_EXPRESSION_NONE; } -#line 4162 "grammar.c" +#line 4509 "grammar.c" break; - case 139: /* primary_expression: '(' primary_expression ')' */ -#line 2355 "grammar.y" + case 141: /* primary_expression: '(' primary_expression ')' */ +#line 2407 "grammar.y" { (yyval.expression) = (yyvsp[-1].expression); } -#line 4170 "grammar.c" +#line 4517 "grammar.c" break; - case 140: /* primary_expression: "" */ -#line 2359 "grammar.y" + case 142: /* primary_expression: "" */ +#line 2411 "grammar.y" { fail_if_error(yr_parser_emit( yyscanner, OP_FILESIZE, NULL)); @@ -4178,11 +4257,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4182 "grammar.c" +#line 4529 "grammar.c" break; - case 141: /* primary_expression: "" */ -#line 2367 "grammar.y" + case 143: /* primary_expression: "" */ +#line 2419 "grammar.y" { yywarning(yyscanner, "Using deprecated \"entrypoint\" keyword. Use the \"entry_point\" " @@ -4194,11 +4273,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4198 "grammar.c" +#line 4545 "grammar.c" break; - case 142: /* primary_expression: "integer function" '(' primary_expression ')' */ -#line 2379 "grammar.y" + case 144: /* primary_expression: "integer function" '(' primary_expression ')' */ +#line 2431 "grammar.y" { check_type((yyvsp[-1].expression), EXPRESSION_TYPE_INTEGER, "intXXXX or uintXXXX"); @@ -4212,33 +4291,33 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4216 "grammar.c" +#line 4563 "grammar.c" break; - case 143: /* primary_expression: "integer number" */ -#line 2393 "grammar.y" + case 145: /* primary_expression: "integer number" */ +#line 2445 "grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = (yyvsp[0].integer); } -#line 4227 "grammar.c" +#line 4574 "grammar.c" break; - case 144: /* primary_expression: "floating point number" */ -#line 2400 "grammar.y" + case 146: /* primary_expression: "floating point number" */ +#line 2452 "grammar.y" { fail_if_error(yr_parser_emit_with_arg_double( yyscanner, OP_PUSH, (yyvsp[0].double_), NULL, NULL)); (yyval.expression).type = EXPRESSION_TYPE_FLOAT; } -#line 4238 "grammar.c" +#line 4585 "grammar.c" break; - case 145: /* primary_expression: "text string" */ -#line 2407 "grammar.y" + case 147: /* primary_expression: "text string" */ +#line 2459 "grammar.y" { YR_ARENA_REF ref; @@ -4263,11 +4342,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_STRING; (yyval.expression).value.sized_string_ref = ref; } -#line 4267 "grammar.c" +#line 4614 "grammar.c" break; - case 146: /* primary_expression: "string count" "" range */ -#line 2432 "grammar.y" + case 148: /* primary_expression: "string count" "" range */ +#line 2484 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-2].c_string), OP_COUNT_IN, YR_UNDEFINED); @@ -4279,11 +4358,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4283 "grammar.c" +#line 4630 "grammar.c" break; - case 147: /* primary_expression: "string count" */ -#line 2444 "grammar.y" + case 149: /* primary_expression: "string count" */ +#line 2496 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[0].c_string), OP_COUNT, YR_UNDEFINED); @@ -4295,11 +4374,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4299 "grammar.c" +#line 4646 "grammar.c" break; - case 148: /* primary_expression: "string offset" '[' primary_expression ']' */ -#line 2456 "grammar.y" + case 150: /* primary_expression: "string offset" '[' primary_expression ']' */ +#line 2508 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_OFFSET, YR_UNDEFINED); @@ -4311,11 +4390,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4315 "grammar.c" +#line 4662 "grammar.c" break; - case 149: /* primary_expression: "string offset" */ -#line 2468 "grammar.y" + case 151: /* primary_expression: "string offset" */ +#line 2520 "grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4330,11 +4409,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4334 "grammar.c" +#line 4681 "grammar.c" break; - case 150: /* primary_expression: "string length" '[' primary_expression ']' */ -#line 2483 "grammar.y" + case 152: /* primary_expression: "string length" '[' primary_expression ']' */ +#line 2535 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_LENGTH, YR_UNDEFINED); @@ -4346,11 +4425,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4350 "grammar.c" +#line 4697 "grammar.c" break; - case 151: /* primary_expression: "string length" */ -#line 2495 "grammar.y" + case 153: /* primary_expression: "string length" */ +#line 2547 "grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4365,11 +4444,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4369 "grammar.c" +#line 4716 "grammar.c" break; - case 152: /* primary_expression: identifier */ -#line 2510 "grammar.y" + case 154: /* primary_expression: identifier */ +#line 2562 "grammar.y" { int result = ERROR_SUCCESS; @@ -4412,11 +4491,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4416 "grammar.c" +#line 4763 "grammar.c" break; - case 153: /* primary_expression: '-' primary_expression */ -#line 2553 "grammar.y" + case 155: /* primary_expression: '-' primary_expression */ +#line 2605 "grammar.y" { int result = ERROR_SUCCESS; @@ -4437,11 +4516,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4441 "grammar.c" +#line 4788 "grammar.c" break; - case 154: /* primary_expression: primary_expression '+' primary_expression */ -#line 2574 "grammar.y" + case 156: /* primary_expression: primary_expression '+' primary_expression */ +#line 2626 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "+", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4476,11 +4555,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4480 "grammar.c" +#line 4827 "grammar.c" break; - case 155: /* primary_expression: primary_expression '-' primary_expression */ -#line 2609 "grammar.y" + case 157: /* primary_expression: primary_expression '-' primary_expression */ +#line 2661 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "-", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4515,11 +4594,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4519 "grammar.c" +#line 4866 "grammar.c" break; - case 156: /* primary_expression: primary_expression '*' primary_expression */ -#line 2644 "grammar.y" + case 158: /* primary_expression: primary_expression '*' primary_expression */ +#line 2696 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "*", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4553,11 +4632,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4557 "grammar.c" +#line 4904 "grammar.c" break; - case 157: /* primary_expression: primary_expression '\\' primary_expression */ -#line 2678 "grammar.y" + case 159: /* primary_expression: primary_expression '\\' primary_expression */ +#line 2730 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "\\", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4582,11 +4661,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4586 "grammar.c" +#line 4933 "grammar.c" break; - case 158: /* primary_expression: primary_expression '%' primary_expression */ -#line 2703 "grammar.y" + case 160: /* primary_expression: primary_expression '%' primary_expression */ +#line 2755 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "%"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -4603,11 +4682,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(ERROR_DIVISION_BY_ZERO); } } -#line 4607 "grammar.c" +#line 4954 "grammar.c" break; - case 159: /* primary_expression: primary_expression '^' primary_expression */ -#line 2720 "grammar.y" + case 161: /* primary_expression: primary_expression '^' primary_expression */ +#line 2772 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -4617,11 +4696,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(^, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 4621 "grammar.c" +#line 4968 "grammar.c" break; - case 160: /* primary_expression: primary_expression '&' primary_expression */ -#line 2730 "grammar.y" + case 162: /* primary_expression: primary_expression '&' primary_expression */ +#line 2782 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -4631,11 +4710,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(&, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 4635 "grammar.c" +#line 4982 "grammar.c" break; - case 161: /* primary_expression: primary_expression '|' primary_expression */ -#line 2740 "grammar.y" + case 163: /* primary_expression: primary_expression '|' primary_expression */ +#line 2792 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "|"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "|"); @@ -4645,11 +4724,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(|, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 4649 "grammar.c" +#line 4996 "grammar.c" break; - case 162: /* primary_expression: '~' primary_expression */ -#line 2750 "grammar.y" + case 164: /* primary_expression: '~' primary_expression */ +#line 2802 "grammar.y" { check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "~"); @@ -4659,11 +4738,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).value.integer = ((yyvsp[0].expression).value.integer == YR_UNDEFINED) ? YR_UNDEFINED : ~((yyvsp[0].expression).value.integer); } -#line 4663 "grammar.c" +#line 5010 "grammar.c" break; - case 163: /* primary_expression: primary_expression "<<" primary_expression */ -#line 2760 "grammar.y" + case 165: /* primary_expression: primary_expression "<<" primary_expression */ +#line 2812 "grammar.y" { int result; @@ -4683,11 +4762,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4687 "grammar.c" +#line 5034 "grammar.c" break; - case 164: /* primary_expression: primary_expression ">>" primary_expression */ -#line 2780 "grammar.y" + case 166: /* primary_expression: primary_expression ">>" primary_expression */ +#line 2832 "grammar.y" { int result; @@ -4707,19 +4786,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4711 "grammar.c" +#line 5058 "grammar.c" break; - case 165: /* primary_expression: regexp */ -#line 2800 "grammar.y" + case 167: /* primary_expression: regexp */ +#line 2852 "grammar.y" { (yyval.expression) = (yyvsp[0].expression); } -#line 4719 "grammar.c" +#line 5066 "grammar.c" break; -#line 4723 "grammar.c" +#line 5070 "grammar.c" default: break; } @@ -4912,5 +4991,5 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); return yyresult; } -#line 2805 "grammar.y" +#line 2857 "grammar.y" diff --git a/libyara/grammar.y b/libyara/grammar.y index dc1384d35e..a8a5c824a1 100644 --- a/libyara/grammar.y +++ b/libyara/grammar.y @@ -2178,6 +2178,25 @@ range result = ERROR_WRONG_TYPE; } + // If we can statically determine lower and upper bounds, ensure + // lower < upper. Check for upper bound here because some things (like + // string count) are EXPRESSION_TYPE_INTEGER. + if ($2.value.integer != YR_UNDEFINED && $4.value.integer != YR_UNDEFINED) + { + if ($2.value.integer > $4.value.integer) + { + yr_compiler_set_error_extra_info( + compiler, "range lower bound must be less than upper bound"); + result = ERROR_INVALID_VALUE; + } + else if ($2.value.integer < 0) + { + yr_compiler_set_error_extra_info( + compiler, "range lower bound can not be negative"); + result = ERROR_INVALID_VALUE; + } + } + fail_if_error(result); } ; diff --git a/tests/test-rules.c b/tests/test-rules.c index ffb9972586..f819969311 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -631,6 +631,28 @@ static void test_strings() }", "foobarbaz" TEXT_1024_BYTES); + // https://github.com/VirusTotal/yara/issues/1695 + assert_false_rule( + "rule test {\n\ + strings:\n\ + $a = \"AXS\"\n\ + $b = \"ERS\"\n\ + condition:\n\ + none of them in (0..10)\n\ + }", + "AXSERS" TEXT_1024_BYTES); + + // https://github.com/VirusTotal/yara/issues/1757 + assert_false_rule( + "rule test {\n\ + strings:\n\ + $a = \"foo\"\n\ + $b = \"foo\"\n\ + condition:\n\ + none of them in (0..1)\n\ + }", + "foo"); + // https://github.com/VirusTotal/yara/issues/1660 assert_false_rule( "rule test {\n\ From 11138994d973427b5315c982e6f818837226e1a8 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 8 Aug 2022 12:37:08 +0200 Subject: [PATCH 26/28] Prevent undefined values in offset ranges from corrupting the VM's stack. --- libyara/exec.c | 17 ++++++++++++---- tests/test-rules.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/libyara/exec.c b/libyara/exec.c index e0865e5a33..40527dae7a 100644 --- a/libyara/exec.c +++ b/libyara/exec.c @@ -1433,11 +1433,20 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r2); // Offset range end pop(r1); // Offset range start + pop(r3); // First string - ensure_defined(r1); - ensure_defined(r2); - - pop(r3); + // If any of the range boundaries are undefined the result is also + // undefined, be we need to unwind the stack first. + if (is_undef(r1) || is_undef(r2)) + { + // Remove all the strings. + while (!is_undef(r3)) pop(r3); + // Remove the quantifier at the bottom of the stack. + pop(r3); + r1.i = YR_UNDEFINED; + push(r1); + break; + } while (!is_undef(r3)) { diff --git a/tests/test-rules.c b/tests/test-rules.c index f819969311..b37dd3f00b 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -1746,6 +1746,38 @@ static void test_of() }", "mississippi"); + // If one of the bounds can not be determined statically it isn't an error. + assert_true_rule( + "rule test { \ + strings: \ + $a = \"AXSERS\" \ + condition: \ + true or any of them in (0..filesize-100) \ + }", + TEXT_1024_BYTES); + + // Lower bound can not be negative, if it can be determined statically. + assert_error( + "rule test { \ + strings: \ + $a = \"AXSERS\" \ + condition: \ + $a in (-1..10) \ + }", + ERROR_INVALID_VALUE); + + // Make sure that an undefined range boundary returns an undefined value, + // which translates to false. + assert_false_rule( + "import \"tests\" \ + rule test { \ + strings: \ + $a = \"missi\" \ + condition: \ + any of them in (0..tests.undefined.i) \ + }", + "mississippi"); + YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); } @@ -1974,6 +2006,25 @@ void test_for() }", NULL); + // Lower bound must be less than upper bound, if it can be determined + // statically. + assert_error( + "rule test { \ + condition: \ + for any i in (10..1): (i) \ + }", + ERROR_INVALID_VALUE); + + // Test case for https://github.com/VirusTotal/yara/issues/1729 + assert_true_rule( + "rule test { \ + strings: \ + $a = \"abcde\" \ + condition: \ + for any n in (1..10) : ( n of ($a*) ) \ + }", + "abcde"); + YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); } From cc02b4fe857758f748e4183c2b2bdc8f7ad80579 Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 8 Aug 2022 12:53:02 +0200 Subject: [PATCH 27/28] Fix issues while merging cherry-picked commits from master. --- libyara/exec.c | 45 ++++- libyara/grammar.c | 355 ++++++++++++++--------------------- libyara/grammar.y | 19 -- libyara/include/yara/types.h | 12 +- tests/test-rules.c | 19 -- 5 files changed, 185 insertions(+), 265 deletions(-) diff --git a/libyara/exec.c b/libyara/exec.c index 40527dae7a..2cbd85599d 100644 --- a/libyara/exec.c +++ b/libyara/exec.c @@ -326,6 +326,22 @@ static int iter_int_enum_next(YR_ITERATOR* self, YR_VALUE_STACK* stack) return ERROR_SUCCESS; } +// Global table that contains the "next" function for different types of +// iterators. The reason for using this table is to avoid storing pointers +// in the YARA's VM stack. Instead of the pointers we store an index within +// this table. +static YR_ITERATOR_NEXT_FUNC iter_next_func_table[] = { + iter_array_next, + iter_dict_next, + iter_int_range_next, + iter_int_enum_next, +}; + +#define ITER_NEXT_ARRAY 0 +#define ITER_NEXT_DICT 1 +#define ITER_NEXT_INT_RANGE 2 +#define ITER_NEXT_INT_ENUM 3 + int yr_execute_code(YR_SCAN_CONTEXT* context) { YR_DEBUG_FPRINTF(2, stderr, "+ %s() {\n", __FUNCTION__); @@ -428,7 +444,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r1); r2.it->array_it.array = r1.o; r2.it->array_it.index = 0; - r2.it->next = iter_array_next; + r2.it->next_func_idx = ITER_NEXT_ARRAY; push(r2); } @@ -449,7 +465,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r1); r2.it->dict_it.dict = r1.o; r2.it->dict_it.index = 0; - r2.it->next = iter_dict_next; + r2.it->next_func_idx = ITER_NEXT_DICT; push(r2); } @@ -473,7 +489,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r1); r3.it->int_range_it.next = r1.i; r3.it->int_range_it.last = r2.i; - r3.it->next = iter_int_range_next; + r3.it->next_func_idx = ITER_NEXT_INT_RANGE; push(r3); } @@ -499,7 +515,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) { r3.it->int_enum_it.count = r1.i; r3.it->int_enum_it.next = 0; - r3.it->next = iter_int_enum_next; + r3.it->next_func_idx = ITER_NEXT_INT_ENUM; for (int64_t i = r1.i; i > 0; i--) { @@ -519,11 +535,22 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) // Loads the iterator in r1, but leaves the iterator in the stack. pop(r1); push(r1); - // The iterator's next function is responsible for pushing the next - // item in the stack, and a boolean indicating if there are more items - // to retrieve. The boolean will be at the top of the stack after - // calling "next". - result = r1.it->next(r1.it, &stack); + + if (r1.it->next_func_idx < + sizeof(iter_next_func_table) / sizeof(YR_ITERATOR_NEXT_FUNC)) + { + // The iterator's next function is responsible for pushing the next + // item in the stack, and a boolean indicating if there are more items + // to retrieve. The boolean will be at the top of the stack after + // calling "next". + result = iter_next_func_table[r1.it->next_func_idx](r1.it, &stack); + } + else + { + // next_func_idx is outside the valid range, this should not happend. + result = ERROR_INTERNAL_FATAL_ERROR; + } + stop = (result != ERROR_SUCCESS); break; diff --git a/libyara/grammar.c b/libyara/grammar.c index deed72c4dd..5ff57ff7d0 100644 --- a/libyara/grammar.c +++ b/libyara/grammar.c @@ -921,23 +921,23 @@ static const yytype_int8 yytranslate[] = /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const yytype_int16 yyrline[] = { - 0, 360, 360, 361, 362, 363, 364, 365, 366, 374, - 387, 392, 386, 419, 422, 438, 441, 456, 461, 462, - 467, 468, 474, 477, 493, 502, 544, 545, 550, 567, - 581, 595, 609, 627, 628, 634, 633, 650, 649, 670, - 669, 694, 700, 760, 761, 762, 763, 764, 765, 771, - 792, 823, 828, 845, 850, 870, 871, 885, 886, 887, - 888, 889, 893, 894, 908, 912, 1007, 1055, 1116, 1161, - 1162, 1166, 1201, 1254, 1296, 1319, 1325, 1331, 1343, 1353, - 1363, 1373, 1383, 1393, 1403, 1413, 1427, 1442, 1453, 1528, - 1566, 1470, 1694, 1705, 1716, 1735, 1754, 1766, 1772, 1778, - 1777, 1823, 1822, 1866, 1873, 1880, 1887, 1894, 1901, 1908, - 1912, 1920, 1921, 1946, 1966, 1994, 2068, 2096, 2104, 2113, - 2156, 2171, 2190, 2200, 2199, 2208, 2222, 2223, 2228, 2238, - 2253, 2252, 2265, 2266, 2271, 2304, 2329, 2377, 2384, 2390, - 2396, 2406, 2410, 2418, 2430, 2444, 2451, 2458, 2483, 2495, - 2507, 2519, 2534, 2546, 2561, 2604, 2625, 2660, 2695, 2729, - 2754, 2771, 2781, 2791, 2801, 2811, 2831, 2851 + 0, 326, 326, 327, 328, 329, 330, 331, 332, 340, + 353, 358, 352, 385, 388, 404, 407, 422, 427, 428, + 433, 434, 440, 443, 459, 468, 510, 511, 516, 533, + 547, 561, 575, 593, 594, 600, 599, 616, 615, 636, + 635, 660, 666, 726, 727, 728, 729, 730, 731, 737, + 758, 789, 794, 811, 816, 836, 837, 851, 852, 853, + 854, 855, 859, 860, 874, 878, 973, 1021, 1082, 1129, + 1130, 1134, 1169, 1222, 1264, 1287, 1293, 1299, 1311, 1321, + 1331, 1341, 1351, 1361, 1371, 1381, 1395, 1410, 1421, 1498, + 1536, 1438, 1695, 1694, 1784, 1790, 1796, 1816, 1836, 1842, + 1848, 1854, 1853, 1899, 1898, 1942, 1949, 1956, 1963, 1970, + 1977, 1984, 1988, 1996, 2016, 2044, 2118, 2146, 2154, 2163, + 2187, 2202, 2222, 2221, 2227, 2238, 2239, 2244, 2251, 2263, + 2262, 2272, 2273, 2278, 2309, 2331, 2335, 2340, 2345, 2354, + 2358, 2366, 2378, 2392, 2399, 2406, 2431, 2443, 2455, 2467, + 2482, 2494, 2509, 2552, 2573, 2608, 2643, 2677, 2702, 2719, + 2729, 2739, 2749, 2759, 2779, 2799 }; #endif @@ -3975,32 +3975,13 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); result = ERROR_WRONG_TYPE; } - // If we can statically determine lower and upper bounds, ensure - // lower < upper. Check for upper bound here because some things (like - // string count) are EXPRESSION_TYPE_INTEGER. - if ((yyvsp[-3].expression).value.integer != YR_UNDEFINED && (yyvsp[-1].expression).value.integer != YR_UNDEFINED) - { - if ((yyvsp[-3].expression).value.integer > (yyvsp[-1].expression).value.integer) - { - yr_compiler_set_error_extra_info( - compiler, "range lower bound must be less than upper bound"); - result = ERROR_INVALID_VALUE; - } - else if ((yyvsp[-3].expression).value.integer < 0) - { - yr_compiler_set_error_extra_info( - compiler, "range lower bound can not be negative"); - result = ERROR_INVALID_VALUE; - } - } - fail_if_error(result); } -#line 4209 "grammar.c" +#line 3981 "grammar.c" break; case 120: /* integer_enumeration: primary_expression */ -#line 2157 "grammar.y" +#line 2188 "grammar.y" { int result = ERROR_SUCCESS; @@ -4015,11 +3996,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = 1; } -#line 4228 "grammar.c" +#line 4000 "grammar.c" break; case 121: /* integer_enumeration: integer_enumeration ',' primary_expression */ -#line 2172 "grammar.y" +#line 2203 "grammar.y" { int result = ERROR_SUCCESS; @@ -4034,112 +4015,62 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.integer) = (yyvsp[-2].integer) + 1; } -#line 4247 "grammar.c" +#line 4019 "grammar.c" break; - case 122: /* string_iterator: string_set */ -#line 2191 "grammar.y" - { - fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); - fail_if_error(yr_parser_emit(yyscanner, OP_ITER_START_STRING_SET, - NULL)); - } -#line 4257 "grammar.c" - break; - - case 123: /* $@10: %empty */ -#line 2200 "grammar.y" + case 122: /* $@11: %empty */ +#line 2222 "grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4266 "grammar.c" +#line 4028 "grammar.c" break; - case 124: /* string_set: '(' $@10 string_enumeration ')' */ -#line 2205 "grammar.y" - { - (yyval.integer) = (yyvsp[-1].integer); - } -#line 4274 "grammar.c" - break; - - case 125: /* string_set: "" */ -#line 2209 "grammar.y" + case 124: /* string_set: "" */ +#line 2228 "grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, YR_UNDEFINED)); fail_if_error(yr_parser_emit_pushes_for_strings( yyscanner, "$*")); } -#line 4288 "grammar.c" +#line 4039 "grammar.c" break; - case 126: /* string_enumeration: string_enumeration_item */ -#line 2222 "grammar.y" - { (yyval.integer) = (yyvsp[0].integer); } -#line 4294 "grammar.c" - break; - - case 127: /* string_enumeration: string_enumeration ',' string_enumeration_item */ -#line 2223 "grammar.y" - { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } -#line 4300 "grammar.c" - break; - - case 128: /* string_enumeration_item: "string identifier" */ -#line 2229 "grammar.y" + case 127: /* string_enumeration_item: "string identifier" */ +#line 2245 "grammar.y" { int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string)); yr_free((yyvsp[0].c_string)); fail_if_error(result); } -#line 4314 "grammar.c" +#line 4050 "grammar.c" break; - case 129: /* string_enumeration_item: "string identifier with wildcard" */ -#line 2239 "grammar.y" + case 128: /* string_enumeration_item: "string identifier with wildcard" */ +#line 2252 "grammar.y" { int result = yr_parser_emit_pushes_for_strings(yyscanner, (yyvsp[0].c_string)); yr_free((yyvsp[0].c_string)); fail_if_error(result); } -#line 4328 "grammar.c" +#line 4061 "grammar.c" break; - case 130: /* $@11: %empty */ -#line 2253 "grammar.y" + case 129: /* $@12: %empty */ +#line 2263 "grammar.y" { // Push end-of-list marker yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); } -#line 4337 "grammar.c" - break; - - case 131: /* rule_set: '(' $@11 rule_enumeration ')' */ -#line 2258 "grammar.y" - { - (yyval.integer) = (yyvsp[-1].integer); - } -#line 4345 "grammar.c" - break; - - case 132: /* rule_enumeration: rule_enumeration_item */ -#line 2265 "grammar.y" - { (yyval.integer) = (yyvsp[0].integer); } -#line 4351 "grammar.c" +#line 4070 "grammar.c" break; - case 133: /* rule_enumeration: rule_enumeration ',' rule_enumeration_item */ -#line 2266 "grammar.y" - { (yyval.integer) = (yyvsp[-2].integer) + (yyvsp[0].integer); } -#line 4357 "grammar.c" - break; - - case 134: /* rule_enumeration_item: "identifier" */ -#line 2272 "grammar.y" + case 133: /* rule_enumeration_item: "identifier" */ +#line 2279 "grammar.y" { int result = ERROR_SUCCESS; @@ -4170,11 +4101,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4394 "grammar.c" +#line 4105 "grammar.c" break; - case 135: /* rule_enumeration_item: "identifier" '*' */ -#line 2305 "grammar.y" + case 134: /* rule_enumeration_item: "identifier" '*' */ +#line 2310 "grammar.y" { YR_NAMESPACE* ns = (YR_NAMESPACE*) yr_arena_get_ptr( compiler->arena, @@ -4192,64 +4123,54 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4419 "grammar.c" +#line 4127 "grammar.c" break; - case 136: /* for_expression: primary_expression */ -#line 2330 "grammar.y" + case 135: /* for_expression: primary_expression */ +#line 2332 "grammar.y" { (yyval.integer) = FOR_EXPRESSION_ANY; } -#line 4471 "grammar.c" +#line 4135 "grammar.c" break; - case 137: /* for_expression: for_quantifier */ -#line 2378 "grammar.y" + case 136: /* for_expression: "" */ +#line 2336 "grammar.y" { yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); (yyval.integer) = FOR_EXPRESSION_ALL; } -#line 4479 "grammar.c" - break; - - case 138: /* for_quantifier: "" */ -#line 2385 "grammar.y" - { - yr_parser_emit_push_const(yyscanner, YR_UNDEFINED); - (yyval.expression).type = EXPRESSION_TYPE_QUANTIFIER; - (yyval.expression).value.integer = FOR_EXPRESSION_ALL; - } -#line 4489 "grammar.c" +#line 4144 "grammar.c" break; - case 139: /* for_quantifier: "" */ -#line 2391 "grammar.y" + case 137: /* for_expression: "" */ +#line 2341 "grammar.y" { yr_parser_emit_push_const(yyscanner, 1); (yyval.integer) = FOR_EXPRESSION_ANY; } -#line 4499 "grammar.c" +#line 4153 "grammar.c" break; - case 140: /* for_quantifier: "" */ -#line 2397 "grammar.y" + case 138: /* for_expression: "" */ +#line 2346 "grammar.y" { yr_parser_emit_push_const(yyscanner, 0); (yyval.integer) = FOR_EXPRESSION_NONE; } -#line 4509 "grammar.c" +#line 4162 "grammar.c" break; - case 141: /* primary_expression: '(' primary_expression ')' */ -#line 2407 "grammar.y" + case 139: /* primary_expression: '(' primary_expression ')' */ +#line 2355 "grammar.y" { (yyval.expression) = (yyvsp[-1].expression); } -#line 4517 "grammar.c" +#line 4170 "grammar.c" break; - case 142: /* primary_expression: "" */ -#line 2411 "grammar.y" + case 140: /* primary_expression: "" */ +#line 2359 "grammar.y" { fail_if_error(yr_parser_emit( yyscanner, OP_FILESIZE, NULL)); @@ -4257,11 +4178,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4529 "grammar.c" +#line 4182 "grammar.c" break; - case 143: /* primary_expression: "" */ -#line 2419 "grammar.y" + case 141: /* primary_expression: "" */ +#line 2367 "grammar.y" { yywarning(yyscanner, "Using deprecated \"entrypoint\" keyword. Use the \"entry_point\" " @@ -4273,11 +4194,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4545 "grammar.c" +#line 4198 "grammar.c" break; - case 144: /* primary_expression: "integer function" '(' primary_expression ')' */ -#line 2431 "grammar.y" + case 142: /* primary_expression: "integer function" '(' primary_expression ')' */ +#line 2379 "grammar.y" { check_type((yyvsp[-1].expression), EXPRESSION_TYPE_INTEGER, "intXXXX or uintXXXX"); @@ -4291,33 +4212,33 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4563 "grammar.c" +#line 4216 "grammar.c" break; - case 145: /* primary_expression: "integer number" */ -#line 2445 "grammar.y" + case 143: /* primary_expression: "integer number" */ +#line 2393 "grammar.y" { fail_if_error(yr_parser_emit_push_const(yyscanner, (yyvsp[0].integer))); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = (yyvsp[0].integer); } -#line 4574 "grammar.c" +#line 4227 "grammar.c" break; - case 146: /* primary_expression: "floating point number" */ -#line 2452 "grammar.y" + case 144: /* primary_expression: "floating point number" */ +#line 2400 "grammar.y" { fail_if_error(yr_parser_emit_with_arg_double( yyscanner, OP_PUSH, (yyvsp[0].double_), NULL, NULL)); (yyval.expression).type = EXPRESSION_TYPE_FLOAT; } -#line 4585 "grammar.c" +#line 4238 "grammar.c" break; - case 147: /* primary_expression: "text string" */ -#line 2459 "grammar.y" + case 145: /* primary_expression: "text string" */ +#line 2407 "grammar.y" { YR_ARENA_REF ref; @@ -4342,11 +4263,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_STRING; (yyval.expression).value.sized_string_ref = ref; } -#line 4614 "grammar.c" +#line 4267 "grammar.c" break; - case 148: /* primary_expression: "string count" "" range */ -#line 2484 "grammar.y" + case 146: /* primary_expression: "string count" "" range */ +#line 2432 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-2].c_string), OP_COUNT_IN, YR_UNDEFINED); @@ -4358,11 +4279,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4630 "grammar.c" +#line 4283 "grammar.c" break; - case 149: /* primary_expression: "string count" */ -#line 2496 "grammar.y" + case 147: /* primary_expression: "string count" */ +#line 2444 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[0].c_string), OP_COUNT, YR_UNDEFINED); @@ -4374,11 +4295,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4646 "grammar.c" +#line 4299 "grammar.c" break; - case 150: /* primary_expression: "string offset" '[' primary_expression ']' */ -#line 2508 "grammar.y" + case 148: /* primary_expression: "string offset" '[' primary_expression ']' */ +#line 2456 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_OFFSET, YR_UNDEFINED); @@ -4390,11 +4311,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4662 "grammar.c" +#line 4315 "grammar.c" break; - case 151: /* primary_expression: "string offset" */ -#line 2520 "grammar.y" + case 149: /* primary_expression: "string offset" */ +#line 2468 "grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4409,11 +4330,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4681 "grammar.c" +#line 4334 "grammar.c" break; - case 152: /* primary_expression: "string length" '[' primary_expression ']' */ -#line 2535 "grammar.y" + case 150: /* primary_expression: "string length" '[' primary_expression ']' */ +#line 2483 "grammar.y" { int result = yr_parser_reduce_string_identifier( yyscanner, (yyvsp[-3].c_string), OP_LENGTH, YR_UNDEFINED); @@ -4425,11 +4346,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4697 "grammar.c" +#line 4350 "grammar.c" break; - case 153: /* primary_expression: "string length" */ -#line 2547 "grammar.y" + case 151: /* primary_expression: "string length" */ +#line 2495 "grammar.y" { int result = yr_parser_emit_push_const(yyscanner, 1); @@ -4444,11 +4365,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = YR_UNDEFINED; } -#line 4716 "grammar.c" +#line 4369 "grammar.c" break; - case 154: /* primary_expression: identifier */ -#line 2562 "grammar.y" + case 152: /* primary_expression: identifier */ +#line 2510 "grammar.y" { int result = ERROR_SUCCESS; @@ -4491,11 +4412,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4763 "grammar.c" +#line 4416 "grammar.c" break; - case 155: /* primary_expression: '-' primary_expression */ -#line 2605 "grammar.y" + case 153: /* primary_expression: '-' primary_expression */ +#line 2553 "grammar.y" { int result = ERROR_SUCCESS; @@ -4516,11 +4437,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4788 "grammar.c" +#line 4441 "grammar.c" break; - case 156: /* primary_expression: primary_expression '+' primary_expression */ -#line 2626 "grammar.y" + case 154: /* primary_expression: primary_expression '+' primary_expression */ +#line 2574 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "+", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4555,11 +4476,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4827 "grammar.c" +#line 4480 "grammar.c" break; - case 157: /* primary_expression: primary_expression '-' primary_expression */ -#line 2661 "grammar.y" + case 155: /* primary_expression: primary_expression '-' primary_expression */ +#line 2609 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "-", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4594,11 +4515,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4866 "grammar.c" +#line 4519 "grammar.c" break; - case 158: /* primary_expression: primary_expression '*' primary_expression */ -#line 2696 "grammar.y" + case 156: /* primary_expression: primary_expression '*' primary_expression */ +#line 2644 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "*", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4632,11 +4553,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4904 "grammar.c" +#line 4557 "grammar.c" break; - case 159: /* primary_expression: primary_expression '\\' primary_expression */ -#line 2730 "grammar.y" + case 157: /* primary_expression: primary_expression '\\' primary_expression */ +#line 2678 "grammar.y" { int result = yr_parser_reduce_operation( yyscanner, "\\", (yyvsp[-2].expression), (yyvsp[0].expression)); @@ -4661,11 +4582,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 4933 "grammar.c" +#line 4586 "grammar.c" break; - case 160: /* primary_expression: primary_expression '%' primary_expression */ -#line 2755 "grammar.y" + case 158: /* primary_expression: primary_expression '%' primary_expression */ +#line 2703 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "%"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "%"); @@ -4682,11 +4603,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(ERROR_DIVISION_BY_ZERO); } } -#line 4954 "grammar.c" +#line 4607 "grammar.c" break; - case 161: /* primary_expression: primary_expression '^' primary_expression */ -#line 2772 "grammar.y" + case 159: /* primary_expression: primary_expression '^' primary_expression */ +#line 2720 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -4696,11 +4617,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(^, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 4968 "grammar.c" +#line 4621 "grammar.c" break; - case 162: /* primary_expression: primary_expression '&' primary_expression */ -#line 2782 "grammar.y" + case 160: /* primary_expression: primary_expression '&' primary_expression */ +#line 2730 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "^"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "^"); @@ -4710,11 +4631,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(&, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 4982 "grammar.c" +#line 4635 "grammar.c" break; - case 163: /* primary_expression: primary_expression '|' primary_expression */ -#line 2792 "grammar.y" + case 161: /* primary_expression: primary_expression '|' primary_expression */ +#line 2740 "grammar.y" { check_type((yyvsp[-2].expression), EXPRESSION_TYPE_INTEGER, "|"); check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "|"); @@ -4724,11 +4645,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).type = EXPRESSION_TYPE_INTEGER; (yyval.expression).value.integer = OPERATION(|, (yyvsp[-2].expression).value.integer, (yyvsp[0].expression).value.integer); } -#line 4996 "grammar.c" +#line 4649 "grammar.c" break; - case 164: /* primary_expression: '~' primary_expression */ -#line 2802 "grammar.y" + case 162: /* primary_expression: '~' primary_expression */ +#line 2750 "grammar.y" { check_type((yyvsp[0].expression), EXPRESSION_TYPE_INTEGER, "~"); @@ -4738,11 +4659,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); (yyval.expression).value.integer = ((yyvsp[0].expression).value.integer == YR_UNDEFINED) ? YR_UNDEFINED : ~((yyvsp[0].expression).value.integer); } -#line 5010 "grammar.c" +#line 4663 "grammar.c" break; - case 165: /* primary_expression: primary_expression "<<" primary_expression */ -#line 2812 "grammar.y" + case 163: /* primary_expression: primary_expression "<<" primary_expression */ +#line 2760 "grammar.y" { int result; @@ -4762,11 +4683,11 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5034 "grammar.c" +#line 4687 "grammar.c" break; - case 166: /* primary_expression: primary_expression ">>" primary_expression */ -#line 2832 "grammar.y" + case 164: /* primary_expression: primary_expression ">>" primary_expression */ +#line 2780 "grammar.y" { int result; @@ -4786,19 +4707,19 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); fail_if_error(result); } -#line 5058 "grammar.c" +#line 4711 "grammar.c" break; - case 167: /* primary_expression: regexp */ -#line 2852 "grammar.y" + case 165: /* primary_expression: regexp */ +#line 2800 "grammar.y" { (yyval.expression) = (yyvsp[0].expression); } -#line 5066 "grammar.c" +#line 4719 "grammar.c" break; -#line 5070 "grammar.c" +#line 4723 "grammar.c" default: break; } @@ -4991,5 +4912,5 @@ YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); return yyresult; } -#line 2857 "grammar.y" +#line 2805 "grammar.y" diff --git a/libyara/grammar.y b/libyara/grammar.y index a8a5c824a1..dc1384d35e 100644 --- a/libyara/grammar.y +++ b/libyara/grammar.y @@ -2178,25 +2178,6 @@ range result = ERROR_WRONG_TYPE; } - // If we can statically determine lower and upper bounds, ensure - // lower < upper. Check for upper bound here because some things (like - // string count) are EXPRESSION_TYPE_INTEGER. - if ($2.value.integer != YR_UNDEFINED && $4.value.integer != YR_UNDEFINED) - { - if ($2.value.integer > $4.value.integer) - { - yr_compiler_set_error_extra_info( - compiler, "range lower bound must be less than upper bound"); - result = ERROR_INVALID_VALUE; - } - else if ($2.value.integer < 0) - { - yr_compiler_set_error_extra_info( - compiler, "range lower bound can not be negative"); - result = ERROR_INVALID_VALUE; - } - } - fail_if_error(result); } ; diff --git a/libyara/include/yara/types.h b/libyara/include/yara/types.h index f1d63c6c63..e823068f1f 100644 --- a/libyara/include/yara/types.h +++ b/libyara/include/yara/types.h @@ -397,12 +397,21 @@ struct RE_AST #pragma warning(disable : 4200) #endif +// The RE structure is embedded in the YARA's VM instruction flow, which +// means that its alignment is not guaranteed. For this reason the it must +// be a "packed" structure, in order to prevent alignment issues in platforms +// with strict alignment constraints. +#pragma pack(push) +#pragma pack(1) + struct RE { uint32_t flags; uint8_t code[0]; }; +#pragma pack(pop) + #ifdef _MSC_VER #pragma warning(pop) #endif @@ -979,7 +988,8 @@ struct YR_INT_ENUM_ITERATOR struct YR_ITERATOR { - YR_ITERATOR_NEXT_FUNC next; + // Index of the next function within the iter_next_func_table global array. + int next_func_idx; union { diff --git a/tests/test-rules.c b/tests/test-rules.c index b37dd3f00b..1fd363b710 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -1756,16 +1756,6 @@ static void test_of() }", TEXT_1024_BYTES); - // Lower bound can not be negative, if it can be determined statically. - assert_error( - "rule test { \ - strings: \ - $a = \"AXSERS\" \ - condition: \ - $a in (-1..10) \ - }", - ERROR_INVALID_VALUE); - // Make sure that an undefined range boundary returns an undefined value, // which translates to false. assert_false_rule( @@ -2006,15 +1996,6 @@ void test_for() }", NULL); - // Lower bound must be less than upper bound, if it can be determined - // statically. - assert_error( - "rule test { \ - condition: \ - for any i in (10..1): (i) \ - }", - ERROR_INVALID_VALUE); - // Test case for https://github.com/VirusTotal/yara/issues/1729 assert_true_rule( "rule test { \ From ba94b4f8ebb6d56786d14f6a0f7529b32d7c216f Mon Sep 17 00:00:00 2001 From: "Victor M. Alvarez" Date: Mon, 8 Aug 2022 13:29:28 +0200 Subject: [PATCH 28/28] Bump version number. --- configure.ac | 2 +- libyara/include/yara/libyara.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/configure.ac b/configure.ac index 64356b4531..282e7845e6 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([yara], [4.2.2], [vmalvarez@virustotal.com]) +AC_INIT([yara], [4.2.3], [vmalvarez@virustotal.com]) AM_SILENT_RULES([yes]) AC_CONFIG_SRCDIR([cli/yara.c]) diff --git a/libyara/include/yara/libyara.h b/libyara/include/yara/libyara.h index c87fc5b15d..323282c102 100644 --- a/libyara/include/yara/libyara.h +++ b/libyara/include/yara/libyara.h @@ -34,7 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define YR_MAJOR_VERSION 4 #define YR_MINOR_VERSION 2 -#define YR_MICRO_VERSION 2 +#define YR_MICRO_VERSION 3 #define version_str(s) _version_str(s) #define _version_str(s) #s