diff --git a/Makefile.am b/Makefile.am index 4ebf01d15e..ca1d615165 100644 --- a/Makefile.am +++ b/Makefile.am @@ -138,6 +138,12 @@ test_dotnet_SOURCES = tests/test-dotnet.c tests/util.c test_dotnet_LDADD = libyara/.libs/libyara.a endif +if MAGIC_MODULE +check_PROGRAMS+=test-magic +test_magic_SOURCES = tests/test-magic.c tests/util.c +test_magic_LDADD = libyara/.libs/libyara.a +endif + if PB_TESTS_MODULE check_PROGRAMS+=test-pb test_pb_SOURCES = tests/test-pb.c tests/util.c diff --git a/bazel/yara.bzl b/bazel/yara.bzl index 51c0729b1e..09e1512069 100644 --- a/bazel/yara.bzl +++ b/bazel/yara.bzl @@ -61,6 +61,7 @@ module_list = rule( def yara_library( name, defines = [], + includes = [], modules = [], modules_srcs = [], deps = [], @@ -195,7 +196,7 @@ def yara_library( "libyara/include/yara/rules.h", ], copts = copts, - includes = [ + includes = includes + [ "libyara/modules", "libyara/include", "libyara", diff --git a/bazel/yara_deps.bzl b/bazel/yara_deps.bzl index 44b18ed21f..0d6e90d1fb 100644 --- a/bazel/yara_deps.bzl +++ b/bazel/yara_deps.bzl @@ -36,9 +36,9 @@ def yara_deps(): maybe( http_archive, name = "openssl", - url = "https://github.com/openssl/openssl/archive/OpenSSL_1_1_0h.tar.gz", - sha256 = "f56dd7d81ce8d3e395f83285bd700a1098ed5a4cb0a81ce9522e41e6db7e0389", - strip_prefix = "openssl-OpenSSL_1_1_0h", + url = "https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1n.tar.gz", + sha256 = "6b2d2440ced8c802aaa61475919f0870ec556694c466ebea460e35ea2b14839e", + strip_prefix = "openssl-OpenSSL_1_1_1n", build_file = "@com_github_virustotal_yara//:bazel/openssl.BUILD", ) maybe( diff --git a/cli/args.c b/cli/args.c index f6e870af5f..939f3e8cd3 100644 --- a/cli/args.c +++ b/cli/args.c @@ -96,7 +96,7 @@ args_error_type_t args_parse_option( *(bool*) opt->value = !(*(bool*) opt->value); break; - case ARGS_OPT_INTEGER: + case ARGS_OPT_LONG: if (opt_arg == NULL) return ARGS_ERROR_REQUIRED_INTEGER_ARG; @@ -110,6 +110,20 @@ args_error_type_t args_parse_option( break; + case ARGS_OPT_LONG_LONG: + if (opt_arg == NULL) + return ARGS_ERROR_REQUIRED_INTEGER_ARG; + + *(long long*) opt->value = _tcstoll(opt_arg, &endptr, 0); + + if (*endptr != '\0') + return ARGS_ERROR_REQUIRED_INTEGER_ARG; + + if (opt_arg_was_used != NULL) + *opt_arg_was_used = 1; + + break; + case ARGS_OPT_STRING: if (opt_arg == NULL) return ARGS_ERROR_REQUIRED_STRING_ARG; @@ -268,7 +282,8 @@ void args_print_usage(args_option_t* options, int help_alignment) if (options->long_name != NULL) len += _stprintf(buffer + len, _T("--%s"), options->long_name); - if (options->type == ARGS_OPT_STRING || options->type == ARGS_OPT_INTEGER) + if (options->type == ARGS_OPT_STRING || options->type == ARGS_OPT_LONG || + options->type == ARGS_OPT_LONG_LONG) { len += _stprintf( buffer + len, diff --git a/cli/args.h b/cli/args.h index eca4bba6e5..e631219240 100644 --- a/cli/args.h +++ b/cli/args.h @@ -33,57 +33,59 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "unicode.h" - #ifdef __cplusplus extern "C" { #endif -typedef enum _args_error_type -{ + typedef enum _args_error_type + { ARGS_ERROR_OK, ARGS_ERROR_UNKNOWN_OPT, ARGS_ERROR_TOO_MANY, ARGS_ERROR_REQUIRED_INTEGER_ARG, ARGS_ERROR_REQUIRED_STRING_ARG, ARGS_ERROR_UNEXPECTED_ARG, -} args_error_type_t; - - -typedef enum _args_option_type -{ - // special - ARGS_OPT_END, - ARGS_OPT_GROUP, - // options with no arguments - ARGS_OPT_BOOLEAN, - // options with arguments (optional or required) - ARGS_OPT_INTEGER, - ARGS_OPT_STRING, -} args_option_type_t; - - -typedef struct _args_option -{ - args_option_type_t type; - const char_t short_name; - const char_t *long_name; - void *value; - int max_count; - const char_t *help; - const char_t *type_help; - int count; -} args_option_t; - + } args_error_type_t; + + typedef enum _args_option_type + { + // special + ARGS_OPT_END, + ARGS_OPT_GROUP, + // options with no arguments + ARGS_OPT_BOOLEAN, + // options with arguments (optional or required) + ARGS_OPT_LONG, + ARGS_OPT_LONG_LONG, + ARGS_OPT_STRING, + } args_option_type_t; + + typedef struct _args_option + { + args_option_type_t type; + const char_t short_name; + const char_t *long_name; + void *value; + int max_count; + const char_t *help; + const char_t *type_help; + int count; + } args_option_t; #define OPT_BOOLEAN(short_name, long_name, value, ...) \ { \ ARGS_OPT_BOOLEAN, short_name, long_name, value, 1, __VA_ARGS__ \ } -#define OPT_INTEGER(short_name, long_name, value, ...) \ - { \ - ARGS_OPT_INTEGER, short_name, long_name, value, 1, __VA_ARGS__ \ +#define OPT_LONG(short_name, long_name, value, ...) \ + { \ + ARGS_OPT_LONG, short_name, long_name, value, 1, __VA_ARGS__ \ + } + +#define OPT_LONG_LONG(short_name, long_name, value, ...) \ + { \ + ARGS_OPT_LONG_LONG, short_name, long_name, value, 1, __VA_ARGS__ \ } #define OPT_STRING_MULTI(short_name, long_name, value, max_count, ...) \ @@ -99,17 +101,11 @@ typedef struct _args_option ARGS_OPT_END, 0 \ } -int args_parse( - args_option_t *options, - int argc, - const char_t **argv); + int args_parse(args_option_t *options, int argc, const char_t **argv); -void args_print_usage( - args_option_t *options, - int alignment); + void args_print_usage(args_option_t *options, int alignment); -void args_free( - args_option_t *options); + void args_free(args_option_t *options); #ifdef __cplusplus } diff --git a/cli/unicode.h b/cli/unicode.h index 5edd63ff9f..631c1f8d3e 100644 --- a/cli/unicode.h +++ b/cli/unicode.h @@ -33,14 +33,14 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef _MSC_VER #include #define char_t TCHAR -#define PF_S "hs" -#define PF_C "hc" +#define PF_S "hs" +#define PF_C "hc" #else #define char_t char -#define _T(x) x -#define PF_S "s" -#define PF_C "c" +#define _T(x) x +#define PF_S "s" +#define PF_C "c" #ifdef __CYGWIN__ #define _tcstok_s strtok_r @@ -48,20 +48,21 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define _tcstok_s strtok_s #endif -#define _tcscmp strcmp -#define _tcsdup strdup -#define _tcschr strchr -#define _tcslen strlen -#define _tcsstr strstr -#define _tcstol strtol -#define _tstoi atoi -#define _tstof atof -#define _tisdigit isdigit -#define _tfopen fopen -#define _ftprintf fprintf -#define _stprintf sprintf -#define _tprintf printf -#define _tmain main +#define _tcscmp strcmp +#define _tcsdup strdup +#define _tcschr strchr +#define _tcslen strlen +#define _tcsstr strstr +#define _tcstol strtol +#define _tcstoll strtoll +#define _tstoi atoi +#define _tstof atof +#define _tisdigit isdigit +#define _tfopen fopen +#define _ftprintf fprintf +#define _stprintf sprintf +#define _tprintf printf +#define _tmain main #define _sntprintf snprintf #endif diff --git a/cli/yara.c b/cli/yara.c index 598fc91ab9..f8857a0c20 100644 --- a/cli/yara.c +++ b/cli/yara.c @@ -161,10 +161,10 @@ static long total_count = 0; static long limit = 0; static long timeout = 1000000; static long stack_size = DEFAULT_STACK_SIZE; -static long skip_larger = 0; static long threads = YR_MAX_THREADS; static long max_strings_per_rule = DEFAULT_MAX_STRINGS_PER_RULE; static long max_process_memory_chunk = DEFAULT_MAX_PROCESS_MEMORY_CHUNK; +static long long skip_larger = 0; #define USAGE_STRING \ "Usage: yara [OPTION]... [NAMESPACE:]RULES_FILE... FILE | DIR | PID" @@ -215,7 +215,7 @@ args_option_t options[] = { _T("print only rules named IDENTIFIER"), _T("IDENTIFIER")), - OPT_INTEGER( + OPT_LONG( 0, _T("max-process-memory-chunk"), &max_process_memory_chunk, @@ -223,14 +223,14 @@ args_option_t options[] = { _T(" (default=1073741824)"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 'l', _T("max-rules"), &limit, _T("abort scanning after matching a NUMBER of rules"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 0, _T("max-strings-per-rule"), &max_strings_per_rule, @@ -310,14 +310,14 @@ args_option_t options[] = { &scan_list_search, _T("scan files listed in FILE, one per line")), - OPT_INTEGER( + OPT_LONG_LONG( 'z', _T("skip-larger"), &skip_larger, _T("skip files larger than the given size when scanning a directory"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 'k', _T("stack-size"), &stack_size, @@ -332,14 +332,14 @@ args_option_t options[] = { _T("print only rules tagged as TAG"), _T("TAG")), - OPT_INTEGER( + OPT_LONG( 'p', _T("threads"), &threads, _T("use the specified NUMBER of threads to scan a directory"), _T("NUMBER")), - OPT_INTEGER( + OPT_LONG( 'a', _T("timeout"), &timeout, @@ -478,7 +478,25 @@ static int scan_dir(const char_t* dir, SCAN_OPTIONS* scan_opts) if (!(FindFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - result = file_queue_put(path, scan_opts->deadline); + LARGE_INTEGER file_size; + + file_size.HighPart = FindFileData.nFileSizeHigh; + file_size.LowPart = FindFileData.nFileSizeLow; + + if (skip_larger > file_size.QuadPart || skip_larger <= 0) + { + result = file_queue_put(path, scan_opts->deadline); + } + else + { + _ftprintf( + stderr, + _T("skipping %s (%" PRIu64 + " bytes) because it's larger than %lld bytes.\n"), + path, + file_size.QuadPart, + skip_larger); + } } else if ( scan_opts->recursive_search && @@ -670,7 +688,7 @@ static int scan_dir(const char* dir, SCAN_OPTIONS* scan_opts) { fprintf( stderr, - "skipping %s (%" PRId64 " bytes) because it's larger than %ld" + "skipping %s (%" PRId64 " bytes) because it's larger than %lld" " bytes.\n", full_path, st.st_size, @@ -1199,7 +1217,7 @@ static int callback( return CALLBACK_CONTINUE; case CALLBACK_MSG_CONSOLE_LOG: - _tprintf(_T("%"PF_S"\n"), (char*) message_data); + _tprintf(_T("%" PF_S "\n"), (char*) message_data); return CALLBACK_CONTINUE; } @@ -1393,10 +1411,10 @@ int _tmain(int argc, const char_t** argv) exit_with_code(EXIT_FAILURE); } - yr_set_configuration_uint32(YR_CONFIG_STACK_SIZE, stack_size); + yr_set_configuration_uint32(YR_CONFIG_STACK_SIZE, (uint32_t) stack_size); yr_set_configuration_uint32( - YR_CONFIG_MAX_STRINGS_PER_RULE, max_strings_per_rule); + YR_CONFIG_MAX_STRINGS_PER_RULE, (uint32_t) max_strings_per_rule); yr_set_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, max_process_memory_chunk); diff --git a/cli/yarac.c b/cli/yarac.c index ea3778eb7a..8144077400 100644 --- a/cli/yarac.c +++ b/cli/yarac.c @@ -103,7 +103,7 @@ args_option_t options[] = { OPT_BOOLEAN('h', _T("help"), &show_help, _T("show this help and exit")), - OPT_INTEGER( + OPT_LONG( 0, _T("max-strings-per-rule"), &max_strings_per_rule, diff --git a/configure.ac b/configure.ac index 8df0c8f0c1..282e7845e6 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([yara], [4.2.0], [vmalvarez@virustotal.com]) +AC_INIT([yara], [4.2.3], [vmalvarez@virustotal.com]) AM_SILENT_RULES([yes]) AC_CONFIG_SRCDIR([cli/yara.c]) diff --git a/libyara/arena.c b/libyara/arena.c index 04d0006b68..784d9ad972 100644 --- a/libyara/arena.c +++ b/libyara/arena.c @@ -434,8 +434,8 @@ int yr_arena_ptr_to_ref(YR_ARENA* arena, const void* address, YR_ARENA_REF* ref) (uint8_t*) address < arena->buffers[i].data + arena->buffers[i].used) { ref->buffer_id = i; - ref->offset = (yr_arena_off_t)( - (uint8_t*) address - arena->buffers[i].data); + ref->offset = + (yr_arena_off_t) ((uint8_t*) address - arena->buffers[i].data); return 1; } @@ -583,7 +583,7 @@ int yr_arena_load_stream(YR_STREAM* stream, YR_ARENA** arena) YR_ARENA_BUFFER* b = &new_arena->buffers[ref.buffer_id]; if (ref.buffer_id >= new_arena->num_buffers || - ref.offset > b->used - sizeof(void*)) + ref.offset > b->used - sizeof(void*) || b->data == NULL) { yr_arena_release(new_arena); return ERROR_CORRUPT_FILE; diff --git a/libyara/compiler.c b/libyara/compiler.c index 34f141294d..4bfbf9bce0 100644 --- a/libyara/compiler.c +++ b/libyara/compiler.c @@ -286,22 +286,28 @@ YR_API int yr_compiler_create(YR_COMPILER** compiler) YR_API void yr_compiler_destroy(YR_COMPILER* compiler) { - yr_arena_release(compiler->arena); + if (compiler->arena != NULL) + yr_arena_release(compiler->arena); if (compiler->automaton != NULL) yr_ac_automaton_destroy(compiler->automaton); - yr_hash_table_destroy(compiler->rules_table, NULL); + if (compiler->rules_table != NULL) + yr_hash_table_destroy(compiler->rules_table, NULL); - yr_hash_table_destroy(compiler->strings_table, NULL); + if (compiler->strings_table != NULL) + yr_hash_table_destroy(compiler->strings_table, NULL); - yr_hash_table_destroy(compiler->wildcard_identifiers_table, NULL); + if (compiler->wildcard_identifiers_table != NULL) + yr_hash_table_destroy(compiler->wildcard_identifiers_table, NULL); - yr_hash_table_destroy(compiler->sz_table, NULL); + if (compiler->sz_table != NULL) + yr_hash_table_destroy(compiler->sz_table, NULL); - yr_hash_table_destroy( - compiler->objects_table, - (YR_HASH_TABLE_FREE_VALUE_FUNC) yr_object_destroy); + if (compiler->objects_table != NULL) + yr_hash_table_destroy( + compiler->objects_table, + (YR_HASH_TABLE_FREE_VALUE_FUNC) yr_object_destroy); if (compiler->atoms_config.free_quality_table) yr_free(compiler->atoms_config.quality_table); diff --git a/libyara/exec.c b/libyara/exec.c index 1a9b2c1c04..2cbd85599d 100644 --- a/libyara/exec.c +++ b/libyara/exec.c @@ -326,6 +326,22 @@ static int iter_int_enum_next(YR_ITERATOR* self, YR_VALUE_STACK* stack) return ERROR_SUCCESS; } +// Global table that contains the "next" function for different types of +// iterators. The reason for using this table is to avoid storing pointers +// in the YARA's VM stack. Instead of the pointers we store an index within +// this table. +static YR_ITERATOR_NEXT_FUNC iter_next_func_table[] = { + iter_array_next, + iter_dict_next, + iter_int_range_next, + iter_int_enum_next, +}; + +#define ITER_NEXT_ARRAY 0 +#define ITER_NEXT_DICT 1 +#define ITER_NEXT_INT_RANGE 2 +#define ITER_NEXT_INT_ENUM 3 + int yr_execute_code(YR_SCAN_CONTEXT* context) { YR_DEBUG_FPRINTF(2, stderr, "+ %s() {\n", __FUNCTION__); @@ -428,7 +444,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r1); r2.it->array_it.array = r1.o; r2.it->array_it.index = 0; - r2.it->next = iter_array_next; + r2.it->next_func_idx = ITER_NEXT_ARRAY; push(r2); } @@ -449,7 +465,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r1); r2.it->dict_it.dict = r1.o; r2.it->dict_it.index = 0; - r2.it->next = iter_dict_next; + r2.it->next_func_idx = ITER_NEXT_DICT; push(r2); } @@ -473,7 +489,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r1); r3.it->int_range_it.next = r1.i; r3.it->int_range_it.last = r2.i; - r3.it->next = iter_int_range_next; + r3.it->next_func_idx = ITER_NEXT_INT_RANGE; push(r3); } @@ -499,7 +515,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) { r3.it->int_enum_it.count = r1.i; r3.it->int_enum_it.next = 0; - r3.it->next = iter_int_enum_next; + r3.it->next_func_idx = ITER_NEXT_INT_ENUM; for (int64_t i = r1.i; i > 0; i--) { @@ -519,11 +535,22 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) // Loads the iterator in r1, but leaves the iterator in the stack. pop(r1); push(r1); - // The iterator's next function is responsible for pushing the next - // item in the stack, and a boolean indicating if there are more items - // to retrieve. The boolean will be at the top of the stack after - // calling "next". - result = r1.it->next(r1.it, &stack); + + if (r1.it->next_func_idx < + sizeof(iter_next_func_table) / sizeof(YR_ITERATOR_NEXT_FUNC)) + { + // The iterator's next function is responsible for pushing the next + // item in the stack, and a boolean indicating if there are more items + // to retrieve. The boolean will be at the top of the stack after + // calling "next". + result = iter_next_func_table[r1.it->next_func_idx](r1.it, &stack); + } + else + { + // next_func_idx is outside the valid range, this should not happend. + result = ERROR_INTERNAL_FATAL_ERROR; + } + stop = (result != ERROR_SUCCESS); break; @@ -1388,10 +1415,24 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) { YR_DEBUG_FPRINTF(2, stderr, "- case OP_OF: // %s()\n", __FUNCTION__); + // Quantifier is "all" if (is_undef(r2)) + { r1.i = found >= count ? 1 : 0; + } + // Quantifier is 0 or none. This is a special case in which we want + // exactly 0 strings matching. More information at: + // https://github.com/VirusTotal/yara/issues/1695 + else if (r2.i == 0) + { + r1.i = found == 0 ? 1 : 0; + } + // In all other cases the number of strings matching should be at + // least the amount specified by the quantifier. else + { r1.i = found >= r2.i ? 1 : 0; + } } else // OP_OF_PERCENT { @@ -1416,12 +1457,23 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) found = 0; count = 0; - pop(r2); - pop(r1); - ensure_defined(r1); - ensure_defined(r2); - pop(r3); + pop(r2); // Offset range end + pop(r1); // Offset range start + pop(r3); // First string + + // If any of the range boundaries are undefined the result is also + // undefined, be we need to unwind the stack first. + if (is_undef(r1) || is_undef(r2)) + { + // Remove all the strings. + while (!is_undef(r3)) pop(r3); + // Remove the quantifier at the bottom of the stack. + pop(r3); + r1.i = YR_UNDEFINED; + push(r1); + break; + } while (!is_undef(r3)) { @@ -1432,6 +1484,7 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) while (match != NULL) { + // String match within range start and range end? if (match->base + match->offset >= r1.i && match->base + match->offset <= r2.i) { @@ -1439,6 +1492,9 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) break; } + // If current match is past range end, we can stop as matches + // are sortred by offset in increasing order, so all remaining + // matches are part the range end too. if (match->base + match->offset > r1.i) break; @@ -1449,11 +1505,26 @@ int yr_execute_code(YR_SCAN_CONTEXT* context) pop(r3); } - pop(r1); - if (is_undef(r1)) + pop(r2); // Quantifier X in expressions like "X of string_set in range" + + // Quantifier is "all". + if (is_undef(r2)) + { r1.i = found >= count ? 1 : 0; + } + // Quantifier is 0 or none. This is a special case in which we want + // exactly 0 strings matching. More information at: + // https://github.com/VirusTotal/yara/issues/1695 + else if (r2.i == 0) + { + r1.i = found == 0 ? 1 : 0; + } + // In all other cases the number of strings matching should be at least + // the amount specified by the quantifier. else - r1.i = found >= r1.i ? 1 : 0; + { + r1.i = found >= r2.i ? 1 : 0; + } push(r1); break; diff --git a/libyara/hex_lexer.c b/libyara/hex_lexer.c index dfe5a921d4..f3b26343bd 100644 --- a/libyara/hex_lexer.c +++ b/libyara/hex_lexer.c @@ -2484,7 +2484,13 @@ int yr_parse_hex_string( (*re_ast)->flags |= RE_FLAGS_DOT_ALL; - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(hex_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/hex_lexer.l b/libyara/hex_lexer.l index b913a9f925..05a1e32f08 100644 --- a/libyara/hex_lexer.l +++ b/libyara/hex_lexer.l @@ -262,7 +262,13 @@ int yr_parse_hex_string( (*re_ast)->flags |= RE_FLAGS_DOT_ALL; - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(hex_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/include/yara/libyara.h b/libyara/include/yara/libyara.h index e7b7323af2..323282c102 100644 --- a/libyara/include/yara/libyara.h +++ b/libyara/include/yara/libyara.h @@ -34,7 +34,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define YR_MAJOR_VERSION 4 #define YR_MINOR_VERSION 2 -#define YR_MICRO_VERSION 0 +#define YR_MICRO_VERSION 3 #define version_str(s) _version_str(s) #define _version_str(s) #s diff --git a/libyara/include/yara/types.h b/libyara/include/yara/types.h index f1d63c6c63..e823068f1f 100644 --- a/libyara/include/yara/types.h +++ b/libyara/include/yara/types.h @@ -397,12 +397,21 @@ struct RE_AST #pragma warning(disable : 4200) #endif +// The RE structure is embedded in the YARA's VM instruction flow, which +// means that its alignment is not guaranteed. For this reason the it must +// be a "packed" structure, in order to prevent alignment issues in platforms +// with strict alignment constraints. +#pragma pack(push) +#pragma pack(1) + struct RE { uint32_t flags; uint8_t code[0]; }; +#pragma pack(pop) + #ifdef _MSC_VER #pragma warning(pop) #endif @@ -979,7 +988,8 @@ struct YR_INT_ENUM_ITERATOR struct YR_ITERATOR { - YR_ITERATOR_NEXT_FUNC next; + // Index of the next function within the iter_next_func_table global array. + int next_func_idx; union { diff --git a/libyara/lexer.c b/libyara/lexer.c index 1a8ed11fbf..3e1f977767 100644 --- a/libyara/lexer.c +++ b/libyara/lexer.c @@ -3588,10 +3588,15 @@ int yr_lex_parse_rules_string( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -3615,10 +3620,15 @@ int yr_lex_parse_rules_file( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -3684,7 +3694,13 @@ int yr_lex_parse_rules_fd( return compiler->errors; } - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_free(buffer); + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; + return compiler->errors; + } #if YYDEBUG yydebug = 1; diff --git a/libyara/lexer.l b/libyara/lexer.l index 5556f96952..637efc970e 100644 --- a/libyara/lexer.l +++ b/libyara/lexer.l @@ -896,10 +896,15 @@ int yr_lex_parse_rules_string( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -923,10 +928,15 @@ int yr_lex_parse_rules_file( compiler->errors = 0; - if (setjmp(compiler->error_recovery) != 0) + if (yylex_init(&yyscanner) != 0) + { + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; return compiler->errors; + } - yylex_init(&yyscanner); + if (setjmp(compiler->error_recovery) != 0) + return compiler->errors; #if YYDEBUG yydebug = 1; @@ -992,7 +1002,13 @@ int yr_lex_parse_rules_fd( return compiler->errors; } - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_free(buffer); + compiler->errors = 1; + compiler->last_error = ERROR_INSUFFICIENT_MEMORY; + return compiler->errors; + } #if YYDEBUG yydebug = 1; diff --git a/libyara/modules.c b/libyara/modules.c index 092fbf4f56..af43905082 100644 --- a/libyara/modules.c +++ b/libyara/modules.c @@ -177,9 +177,7 @@ int yr_modules_load(const char* module_name, YR_SCAN_CONTEXT* context) int yr_modules_unload_all(YR_SCAN_CONTEXT* context) { - int i; - - for (i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) + for (int i = 0; i < sizeof(yr_modules_table) / sizeof(YR_MODULE); i++) { YR_OBJECT* module_structure = (YR_OBJECT*) yr_hash_table_remove( context->objects_table, yr_modules_table[i].name, NULL); diff --git a/libyara/modules/dex/dex.c b/libyara/modules/dex/dex.c index 6c9fd499d5..1fbec8278e 100644 --- a/libyara/modules/dex/dex.c +++ b/libyara/modules/dex/dex.c @@ -418,7 +418,7 @@ static int64_t dex_get_integer( const char* pattern, int64_t index) { - if (index == YR_UNDEFINED) + if (index == YR_UNDEFINED || index < 0) return YR_UNDEFINED; // Impose a reasonably large limit to table indexes. @@ -434,7 +434,7 @@ static SIZED_STRING* dex_get_string( const char* pattern, int64_t index) { - if (index == YR_UNDEFINED) + if (index == YR_UNDEFINED || index < 0) return NULL; // Impose a reasonably large limit to table indexes. @@ -619,7 +619,7 @@ uint32_t load_encoded_field( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tFIELD_NAME %s NAME_IDX 0x%x\n", field_name->c_string, name_idx); + "[DEX]\tFIELD_NAME %s NAME_IDX 0x%llx\n", field_name->c_string, name_idx); #endif set_sized_string( @@ -643,7 +643,7 @@ uint32_t load_encoded_field( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%x DESCRIPTOR_IDX 0x%x\n", + "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%llx DESCRIPTOR_IDX 0x%llx\n", class_name->c_string, class_idx, descriptor_idx); @@ -748,7 +748,7 @@ uint32_t load_encoded_method( return 0; #ifdef DEBUG_DEX_MODULE - printf("[DEX]\tNAME_IDX 0x%x\n", name_idx); + printf("[DEX]\tNAME_IDX 0x%llx\n", name_idx); #endif #ifdef DEBUG_DEX_MODULE @@ -768,7 +768,7 @@ uint32_t load_encoded_method( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tMETHOD_NAME %s NAME_IDX 0x%x\n", + "[DEX]\tMETHOD_NAME %s NAME_IDX 0x%llx\n", method_name->c_string, name_idx); #endif @@ -794,7 +794,7 @@ uint32_t load_encoded_method( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%x DESCRIPTOR_IDX:0x%x\n", + "[DEX]\tCLASS_NAME %s CLASS_IDX 0x%llx DESCRIPTOR_IDX:0x%llx\n", class_name->c_string, class_idx, descriptor_idx); @@ -821,7 +821,7 @@ uint32_t load_encoded_method( { #ifdef DEBUG_DEX_MODULE printf( - "[DEX]\tPROTO_NAME %s CLASS_IDX 0x%x DESCRIPTOR_IDX:0x%x\n", + "[DEX]\tPROTO_NAME %s CLASS_IDX 0x%llx DESCRIPTOR_IDX:0x%llx\n", proto_name->c_string, class_idx, descriptor_idx); @@ -842,7 +842,7 @@ uint32_t load_encoded_method( #endif if (struct_fits_in_dex( - dex, dex->data + encoded_method.code_off, sizeof(code_item_t))) + dex, dex->data + encoded_method.code_off, code_item_t)) { code_item_t* code_item = (code_item_t*) (dex->data + encoded_method.code_off); @@ -954,7 +954,7 @@ void dex_parse(DEX* dex, uint64_t base_address) if (!fits_in_dex( dex, - dex->data + yr_le32toh(string_id_item->string_data_offset), + dex->data + yr_le32toh(string_id_item->string_data_offset) + 1, value)) continue; @@ -967,8 +967,8 @@ void dex_parse(DEX* dex, uint64_t base_address) set_integer(value, dex->object, "string_ids[%i].size", i); set_sized_string( - (const char*) (( - dex->data + yr_le32toh(string_id_item->string_data_offset) + 1)), + (const char*) ( + dex->data + yr_le32toh(string_id_item->string_data_offset) + 1), value, dex->object, "string_ids[%i].value", @@ -1124,6 +1124,9 @@ void dex_parse(DEX* dex, uint64_t base_address) map_item_t* map_item = (map_item_t*) (dex->data + yr_le32toh(dex_header->map_offset) + sizeof(uint32_t) + i * sizeof(map_item_t)); + if (!struct_fits_in_dex(dex, map_item, map_item_t)) + return; + set_integer( yr_le16toh(map_item->type), dex->object, diff --git a/libyara/modules/dotnet/dotnet.c b/libyara/modules/dotnet/dotnet.c index 112e6fb092..8e1b7a2f56 100644 --- a/libyara/modules/dotnet/dotnet.c +++ b/libyara/modules/dotnet/dotnet.c @@ -1637,6 +1637,7 @@ static bool dotnet_is_dotnet(PE* pe) int64_t metadata_root = pe_rva_to_offset( pe, yr_le32toh(cli_header->MetaData.VirtualAddress)); + offset = metadata_root; if (!struct_fits_in_pe(pe, pe->data + metadata_root, NET_METADATA)) return false; @@ -1650,7 +1651,7 @@ static bool dotnet_is_dotnet(PE* pe) // Also make sure it fits in pe. uint32_t md_len = yr_le32toh(metadata->Length); if (md_len == 0 || md_len > 255 || md_len % 4 != 0 || - !fits_in_pe(pe, pe->data + offset, md_len)) + !fits_in_pe(pe, pe->data + offset + sizeof(NET_METADATA), md_len)) { return false; } @@ -1667,7 +1668,7 @@ static bool dotnet_is_dotnet(PE* pe) int64_t entry_offset = pe_rva_to_offset( pe, yr_le32toh(pe->header->OptionalHeader.AddressOfEntryPoint)); - if (offset < 0 || !fits_in_pe(pe, pe->data + entry_offset, 2)) + if (entry_offset < 0 || !fits_in_pe(pe, pe->data + entry_offset, 2)) return false; const uint8_t* entry_data = pe->data + entry_offset; @@ -1721,7 +1722,7 @@ void dotnet_parse_com(PE* pe) md_len = yr_le32toh(metadata->Length); if (md_len == 0 || md_len > 255 || md_len % 4 != 0 || - !fits_in_pe(pe, pe->data + offset, md_len)) + !fits_in_pe(pe, pe->data + offset + sizeof(NET_METADATA), md_len)) { return; } diff --git a/libyara/modules/hash/hash.c b/libyara/modules/hash/hash.c index caff0fc07a..003fa94d3c 100644 --- a/libyara/modules/hash/hash.c +++ b/libyara/modules/hash/hash.c @@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define MODULE_NAME hash - typedef struct _CACHE_KEY { int64_t offset; @@ -43,7 +42,6 @@ typedef struct _CACHE_KEY } CACHE_KEY; - const uint32_t crc32_tab[] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, @@ -89,7 +87,6 @@ const uint32_t crc32_tab[] = { 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d}; - static void digest_to_ascii( unsigned char* digest, char* digest_ascii, @@ -103,7 +100,6 @@ static void digest_to_ascii( digest_ascii[digest_length * 2] = '\0'; } - static char* get_from_cache( YR_OBJECT* module_object, const char* ns, @@ -131,7 +127,6 @@ static char* get_from_cache( return result; } - static int add_to_cache( YR_OBJECT* module_object, const char* ns, @@ -166,7 +161,6 @@ static int add_to_cache( return result; } - define_function(string_md5) { unsigned char digest[YR_MD5_LEN]; @@ -192,7 +186,6 @@ define_function(string_md5) return_string(digest_ascii); } - define_function(string_sha256) { unsigned char digest[YR_SHA256_LEN]; @@ -218,7 +211,6 @@ define_function(string_sha256) return_string(digest_ascii); } - define_function(string_sha1) { unsigned char digest[YR_SHA1_LEN]; @@ -244,7 +236,6 @@ define_function(string_sha1) return_string(digest_ascii); } - define_function(string_checksum32) { size_t i; @@ -252,7 +243,7 @@ define_function(string_checksum32) SIZED_STRING* s = sized_string_argument(1); uint32_t checksum = 0; - for (i = 0; i < s->length; i++) checksum += (uint8_t)(s->c_string[i]); + for (i = 0; i < s->length; i++) checksum += (uint8_t) (s->c_string[i]); YR_DEBUG_FPRINTF( 2, @@ -265,7 +256,6 @@ define_function(string_checksum32) return_integer(checksum); } - define_function(data_md5) { yr_md5_ctx md5_context; @@ -298,11 +288,10 @@ define_function(data_md5) { YR_DEBUG_FPRINTF( 2, stderr, "} // %s() = YR_UNDEFINED // block == NULL\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_md5_init(&md5_context); - if (offset < 0 || length < 0 || offset < block->base) { YR_DEBUG_FPRINTF( @@ -310,6 +299,7 @@ define_function(data_md5) stderr, "} // %s() = YR_UNDEFINED // bad offset / length\n", __FUNCTION__); + return_string(YR_UNDEFINED); } @@ -323,9 +313,12 @@ define_function(data_md5) "} // %s() = %s (cached)\n", __FUNCTION__, cached_ascii_digest); + return_string(cached_ascii_digest); } + yr_md5_init(&md5_context); + foreach_memory_block(iterator, block) { // if desired block within current block @@ -336,9 +329,9 @@ define_function(data_md5) if (block_data != NULL) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( - length, (size_t)(block->size - data_offset)); + length, (size_t) (block->size - data_offset)); offset += data_len; length -= data_len; @@ -361,6 +354,9 @@ define_function(data_md5) stderr, "} // %s() = YR_UNDEFINED // past_first_block\n", __FUNCTION__); + + yr_md5_final(digest, &md5_context); + return_string(YR_UNDEFINED); } @@ -368,6 +364,8 @@ define_function(data_md5) break; } + yr_md5_final(digest, &md5_context); + if (!past_first_block) { YR_DEBUG_FPRINTF( @@ -375,11 +373,10 @@ define_function(data_md5) stderr, "} // %s() = YR_UNDEFINED // !past_first_block\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_md5_final(digest, &md5_context); - digest_to_ascii(digest, digest_ascii, YR_MD5_LEN); FAIL_ON_ERROR( @@ -389,7 +386,6 @@ define_function(data_md5) return_string(digest_ascii); } - define_function(data_sha1) { yr_sha1_ctx sha_context; @@ -422,11 +418,10 @@ define_function(data_sha1) { YR_DEBUG_FPRINTF( 2, stderr, "} // %s() = YR_UNDEFINED // block == NULL\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha1_init(&sha_context); - if (offset < 0 || length < 0 || offset < block->base) { YR_DEBUG_FPRINTF( @@ -434,6 +429,7 @@ define_function(data_sha1) stderr, "} // %s() = YR_UNDEFINED // bad offset / length\n", __FUNCTION__); + return_string(YR_UNDEFINED); } @@ -448,9 +444,12 @@ define_function(data_sha1) "} // %s() = %s (cached)\n", __FUNCTION__, cached_ascii_digest); + return_string(cached_ascii_digest); } + yr_sha1_init(&sha_context); + foreach_memory_block(iterator, block) { // if desired block within current block @@ -460,7 +459,7 @@ define_function(data_sha1) if (block_data != NULL) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min( length, (size_t) block->size - data_offset); @@ -485,6 +484,8 @@ define_function(data_sha1) stderr, "} // %s() = YR_UNDEFINED // past_first_block\n", __FUNCTION__); + + yr_sha1_final(digest, &sha_context); return_string(YR_UNDEFINED); } @@ -492,6 +493,8 @@ define_function(data_sha1) break; } + yr_sha1_final(digest, &sha_context); + if (!past_first_block) { YR_DEBUG_FPRINTF( @@ -499,11 +502,10 @@ define_function(data_sha1) stderr, "} // %s() = YR_UNDEFINED // !past_first_block\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha1_final(digest, &sha_context); - digest_to_ascii(digest, digest_ascii, YR_SHA1_LEN); FAIL_ON_ERROR( @@ -513,7 +515,6 @@ define_function(data_sha1) return_string(digest_ascii); } - define_function(data_sha256) { yr_sha256_ctx sha256_context; @@ -546,11 +547,10 @@ define_function(data_sha256) { YR_DEBUG_FPRINTF( 2, stderr, "} // %s() = YR_UNDEFINED // block == NULL\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha256_init(&sha256_context); - if (offset < 0 || length < 0 || offset < block->base) { YR_DEBUG_FPRINTF( @@ -558,6 +558,7 @@ define_function(data_sha256) stderr, "} // %s() = YR_UNDEFINED // bad offset / length\n", __FUNCTION__); + return_string(YR_UNDEFINED); } @@ -572,9 +573,12 @@ define_function(data_sha256) "} // %s() = %s (cached)\n", __FUNCTION__, cached_ascii_digest); + return_string(cached_ascii_digest); } + yr_sha256_init(&sha256_context); + foreach_memory_block(iterator, block) { // if desired block within current block @@ -584,7 +588,7 @@ define_function(data_sha256) if (block_data != NULL) { - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min(length, block->size - data_offset); offset += data_len; @@ -608,6 +612,8 @@ define_function(data_sha256) stderr, "} // %s() = YR_UNDEFINED // past_first_block\n", __FUNCTION__); + + yr_sha256_final(digest, &sha256_context); return_string(YR_UNDEFINED); } @@ -615,6 +621,8 @@ define_function(data_sha256) break; } + yr_sha256_final(digest, &sha256_context); + if (!past_first_block) { YR_DEBUG_FPRINTF( @@ -622,11 +630,10 @@ define_function(data_sha256) stderr, "} // %s() = YR_UNDEFINED // !past_first_block\n", __FUNCTION__); + return_string(YR_UNDEFINED); } - yr_sha256_final(digest, &sha256_context); - digest_to_ascii(digest, digest_ascii, YR_SHA256_LEN); FAIL_ON_ERROR( @@ -636,7 +643,6 @@ define_function(data_sha256) return_string(digest_ascii); } - define_function(data_checksum32) { int64_t offset = integer_argument(1); // offset where to start @@ -673,7 +679,7 @@ define_function(data_checksum32) { size_t i; - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min(length, block->size - data_offset); offset += data_len; @@ -707,7 +713,6 @@ define_function(data_checksum32) return_integer(checksum); } - define_function(string_crc32) { size_t i; @@ -729,7 +734,6 @@ define_function(string_crc32) return_integer(checksum ^ 0xFFFFFFFF); } - define_function(data_crc32) { int64_t offset = integer_argument(1); // offset where to start @@ -766,7 +770,7 @@ define_function(data_crc32) { size_t i; - size_t data_offset = (size_t)(offset - block->base); + size_t data_offset = (size_t) (offset - block->base); size_t data_len = (size_t) yr_min(length, block->size - data_offset); offset += data_len; @@ -803,7 +807,6 @@ define_function(data_crc32) return_integer(checksum ^ 0xFFFFFFFF); } - begin_declarations declare_function("md5", "ii", "s", data_md5); declare_function("md5", "s", "s", string_md5); @@ -821,7 +824,6 @@ begin_declarations declare_function("crc32", "s", "i", string_crc32); end_declarations - int module_initialize(YR_MODULE* module) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {}\n", __FUNCTION__); @@ -829,7 +831,6 @@ int module_initialize(YR_MODULE* module) return ERROR_SUCCESS; } - int module_finalize(YR_MODULE* module) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {}\n", __FUNCTION__); @@ -837,7 +838,6 @@ int module_finalize(YR_MODULE* module) return ERROR_SUCCESS; } - int module_load( YR_SCAN_CONTEXT* context, YR_OBJECT* module_object, @@ -855,7 +855,6 @@ int module_load( return ERROR_SUCCESS; } - int module_unload(YR_OBJECT* module_object) { YR_DEBUG_FPRINTF(2, stderr, "- %s() {}\n", __FUNCTION__); diff --git a/libyara/modules/magic/magic.c b/libyara/modules/magic/magic.c index 7a72b7d5ef..bbaad804b7 100644 --- a/libyara/modules/magic/magic.c +++ b/libyara/modules/magic/magic.c @@ -39,11 +39,9 @@ The original idea and inspiration for this module comes from Armin Buescher. #define MODULE_NAME magic - // Thread-local storage key used to store a pointer to a MAGIC_CACHE struct. YR_THREAD_STORAGE_KEY magic_tls; - typedef struct { magic_t magic_cookie; @@ -52,7 +50,6 @@ typedef struct } MAGIC_CACHE; - static int get_cache(MAGIC_CACHE** cache) { *cache = (MAGIC_CACHE*) yr_thread_storage_get_value(&magic_tls); @@ -88,7 +85,6 @@ static int get_cache(MAGIC_CACHE** cache) return ERROR_SUCCESS; } - define_function(magic_mime_type) { YR_SCAN_CONTEXT* context = scan_context(); @@ -111,8 +107,8 @@ define_function(magic_mime_type) { magic_setflags(cache->magic_cookie, MAGIC_MIME_TYPE); - cache->cached_mime_type = magic_buffer( - cache->magic_cookie, block_data, block->size); + cache->cached_mime_type = yr_strdup( + magic_buffer(cache->magic_cookie, block_data, block->size)); } } @@ -122,7 +118,6 @@ define_function(magic_mime_type) return_string((char*) cache->cached_mime_type); } - define_function(magic_type) { MAGIC_CACHE* cache; @@ -145,8 +140,8 @@ define_function(magic_type) { magic_setflags(cache->magic_cookie, 0); - cache->cached_type = magic_buffer( - cache->magic_cookie, block_data, block->size); + cache->cached_type = yr_strdup( + magic_buffer(cache->magic_cookie, block_data, block->size)); } } @@ -161,13 +156,11 @@ begin_declarations declare_function("type", "", "s", magic_type); end_declarations - int module_initialize(YR_MODULE* module) { return yr_thread_storage_create(&magic_tls); } - int module_finalize(YR_MODULE* module) { MAGIC_CACHE* cache = (MAGIC_CACHE*) yr_thread_storage_get_value(&magic_tls); @@ -181,7 +174,6 @@ int module_finalize(YR_MODULE* module) return yr_thread_storage_destroy(&magic_tls); } - int module_load( YR_SCAN_CONTEXT* context, YR_OBJECT* module_object, @@ -191,13 +183,18 @@ int module_load( return ERROR_SUCCESS; } - int module_unload(YR_OBJECT* module) { MAGIC_CACHE* cache = (MAGIC_CACHE*) yr_thread_storage_get_value(&magic_tls); if (cache != NULL) { + if (cache->cached_type != NULL) + yr_free((void*) cache->cached_type); + + if (cache->cached_mime_type != NULL) + yr_free((void*) cache->cached_mime_type); + cache->cached_type = NULL; cache->cached_mime_type = NULL; } diff --git a/libyara/notebook.c b/libyara/notebook.c index 754c9ed8c9..6036865f0e 100644 --- a/libyara/notebook.c +++ b/libyara/notebook.c @@ -140,6 +140,13 @@ int yr_notebook_destroy(YR_NOTEBOOK* notebook) // void* yr_notebook_alloc(YR_NOTEBOOK* notebook, size_t size) { + // In ARM make sure the buffer's size is rounded up to a multiple of 4, + // which also implies that the returned pointers are aligned to 4 bytes. + +#if defined(__arm__) + size = (size + 3) & ~0x3; +#endif + // The requested memory size can't be larger than a notebook's page. assert(size <= notebook->page_size); @@ -160,17 +167,6 @@ void* yr_notebook_alloc(YR_NOTEBOOK* notebook, size_t size) void* ptr = notebook->page_list_head->data + notebook->page_list_head->used; -// In ARM make sure the alignment of the returned buffer is 4 bytes. -#if defined(__arm__) - uintptr_t misalignment = (uintptr_t) ptr & 3; - - if (misalignment) - { - size += 4 - misalignment; - ptr += 4 - misalignment; - } -#endif - notebook->page_list_head->used += size; return ptr; diff --git a/libyara/object.c b/libyara/object.c index 970cf5a9a0..359feabb60 100644 --- a/libyara/object.c +++ b/libyara/object.c @@ -41,6 +41,12 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +//////////////////////////////////////////////////////////////////////////////// +// Creates a new object with the given type and identifier. If a parent is +// specified the new object is owned by the parent and it will be destroyed when +// the parent is destroyed. You must not call yr_object_destroy on an objected +// that has a parent, you should destroy the parent instead. +// int yr_object_create( int8_t type, const char* identifier, @@ -48,10 +54,10 @@ int yr_object_create( YR_OBJECT** object) { YR_OBJECT* obj; - int i; size_t object_size = 0; assert(parent != NULL || object != NULL); + assert(identifier != NULL); switch (type) { @@ -114,7 +120,7 @@ int yr_object_create( break; case OBJECT_TYPE_FUNCTION: object_as_function(obj)->return_obj = NULL; - for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) + for (int i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) { object_as_function(obj)->prototypes[i].arguments_fmt = NULL; object_as_function(obj)->prototypes[i].code = NULL; @@ -186,7 +192,6 @@ int yr_object_function_create( YR_OBJECT_FUNCTION* f = NULL; int8_t return_type; - int i; // The parent of a function must be a structure. assert(parent != NULL && parent->type == OBJECT_TYPE_STRUCTURE); @@ -219,14 +224,15 @@ int yr_object_function_create( FAIL_ON_ERROR( yr_object_create(OBJECT_TYPE_FUNCTION, identifier, parent, &o)); - FAIL_ON_ERROR_WITH_CLEANUP( - yr_object_create(return_type, "result", o, &return_obj), - yr_object_destroy(o)); + // In case of failure while creating return_obj we don't need to free the + // previously created "o" object, as it is already associated with its + // parent and will be destroyed when the parent is destroyed. + FAIL_ON_ERROR(yr_object_create(return_type, "result", o, &return_obj)); f = object_as_function(o); } - for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) + for (int i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) { if (f->prototypes[i].arguments_fmt == NULL) { @@ -306,6 +312,10 @@ int yr_object_from_external_variable( return result; } +//////////////////////////////////////////////////////////////////////////////// +// Destroy an objects, and any other object that is a child of it. For example, +// destroying a struct will destroy all its members. +// void yr_object_destroy(YR_OBJECT* object) { YR_STRUCTURE_MEMBER* member; @@ -313,8 +323,6 @@ void yr_object_destroy(YR_OBJECT* object) YR_ARRAY_ITEMS* array_items; YR_DICTIONARY_ITEMS* dict_items; - int i; - if (object == NULL) return; @@ -345,7 +353,7 @@ void yr_object_destroy(YR_OBJECT* object) if (array_items != NULL) { - for (i = 0; i < array_items->length; i++) + for (int i = 0; i < array_items->length; i++) if (array_items->objects[i] != NULL) yr_object_destroy(array_items->objects[i]); } @@ -361,7 +369,7 @@ void yr_object_destroy(YR_OBJECT* object) if (dict_items != NULL) { - for (i = 0; i < dict_items->used; i++) + for (int i = 0; i < dict_items->used; i++) { if (dict_items->objects[i].key != NULL) yr_free(dict_items->objects[i].key); @@ -531,8 +539,6 @@ int yr_object_copy(YR_OBJECT* object, YR_OBJECT** object_copy) YR_STRUCTURE_MEMBER* structure_member; - int i; - *object_copy = NULL; FAIL_ON_ERROR( @@ -568,7 +574,7 @@ int yr_object_copy(YR_OBJECT* object, YR_OBJECT** object_copy) // cleanup yr_object_destroy(copy)); - for (i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) + for (int i = 0; i < YR_MAX_OVERLOADED_FUNCTIONS; i++) object_as_function(copy)->prototypes[i] = object_as_function(object)->prototypes[i]; @@ -691,7 +697,6 @@ int yr_object_array_set_item(YR_OBJECT* object, YR_OBJECT* item, int index) { YR_OBJECT_ARRAY* array; - int i; int capacity; assert(index >= 0); @@ -728,7 +733,7 @@ int yr_object_array_set_item(YR_OBJECT* object, YR_OBJECT* item, int index) if (array->items == NULL) return ERROR_INSUFFICIENT_MEMORY; - for (i = array->items->capacity; i < capacity; i++) + for (int i = array->items->capacity; i < capacity; i++) array->items->objects[i] = NULL; array->items->capacity = capacity; @@ -748,8 +753,6 @@ YR_OBJECT* yr_object_dict_get_item( int flags, const char* key) { - int i; - YR_OBJECT* result = NULL; YR_OBJECT_DICTIONARY* dict; @@ -759,7 +762,7 @@ YR_OBJECT* yr_object_dict_get_item( if (dict->items != NULL) { - for (i = 0; i < dict->items->used; i++) + for (int i = 0; i < dict->items->used; i++) { if (strcmp(dict->items->objects[i].key->c_string, key) == 0) result = dict->items->objects[i].obj; @@ -781,7 +784,6 @@ int yr_object_dict_set_item(YR_OBJECT* object, YR_OBJECT* item, const char* key) { YR_OBJECT_DICTIONARY* dict; - int i; int count; assert(object->type == OBJECT_TYPE_DICTIONARY); @@ -813,7 +815,7 @@ int yr_object_dict_set_item(YR_OBJECT* object, YR_OBJECT* item, const char* key) if (dict->items == NULL) return ERROR_INSUFFICIENT_MEMORY; - for (i = dict->items->used; i < count; i++) + for (int i = dict->items->used; i < count; i++) { dict->items->objects[i].key = NULL; dict->items->objects[i].obj = NULL; @@ -1072,7 +1074,6 @@ YR_API void yr_object_print_data( YR_STRUCTURE_MEMBER* member; char indent_spaces[32]; - int i; indent = yr_min(indent, sizeof(indent_spaces) - 1); @@ -1105,10 +1106,9 @@ YR_API void yr_object_print_data( if (object->value.ss != NULL) { - size_t l; printf(" = \""); - for (l = 0; l < object->value.ss->length; l++) + for (size_t l = 0; l < object->value.ss->length; l++) { char c = object->value.ss->c_string[l]; @@ -1144,7 +1144,7 @@ YR_API void yr_object_print_data( break; case OBJECT_TYPE_ARRAY: - for (i = 0; i < yr_object_array_length(object); i++) + for (int i = 0; i < yr_object_array_length(object); i++) { YR_OBJECT* o = yr_object_array_get_item(object, 0, i); @@ -1162,7 +1162,7 @@ YR_API void yr_object_print_data( if (dict_items != NULL) { - for (i = 0; i < dict_items->used; i++) + for (int i = 0; i < dict_items->used; i++) { printf("\n%s\t%s", indent_spaces, dict_items->objects[i].key->c_string); diff --git a/libyara/proc.c b/libyara/proc.c index 2c3ccce4e3..089c25698f 100644 --- a/libyara/proc.c +++ b/libyara/proc.c @@ -54,6 +54,8 @@ YR_API int yr_process_open_iterator(int pid, YR_MEMORY_BLOCK_ITERATOR* iterator) iterator->context = context; iterator->first = yr_process_get_first_memory_block; iterator->next = yr_process_get_next_memory_block; + iterator->last_error = ERROR_SUCCESS; + // In a process scan file size is undefined, when the file_size function is // set to NULL the value returned by the filesize keyword is YR_UNDEFINED. iterator->file_size = NULL; diff --git a/libyara/proc/freebsd.c b/libyara/proc/freebsd.c index a8e2c4b94d..54d3a445f8 100644 --- a/libyara/proc/freebsd.c +++ b/libyara/proc/freebsd.c @@ -138,8 +138,6 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( proc_info->vm_entry.pve_path = buf; proc_info->vm_entry.pve_pathlen = sizeof(buf); - iterator->last_error = ERROR_SUCCESS; - uint64_t current_begin = context->current_block.base + context->current_block.size; @@ -148,6 +146,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( yr_get_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, &max_process_memory_chunk); + iterator->last_error = ERROR_SUCCESS; + if (proc_info->vm_entry.pve_end <= current_begin) { if (ptrace( @@ -180,7 +180,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( proc_info->vm_entry.pve_entry = 0; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/proc/linux.c b/libyara/proc/linux.c index 2db9e7c5a3..4de4529d6b 100644 --- a/libyara/proc/linux.c +++ b/libyara/proc/linux.c @@ -157,8 +157,9 @@ YR_API const uint8_t* yr_process_fetch_memory_block_data(YR_MEMORY_BLOCK* block) int fd = -2; // Assume mapping not connected with a file. - if (strlen(proc_info->map_path) > 0 && proc_info->map_dmaj != 0 && - proc_info->map_ino != 0) + // Only try mapping the file if it has a path and belongs to a device + if (strlen(proc_info->map_path) > 0 && + !(proc_info->map_dmaj == 0 && proc_info->map_dmin == 0)) { struct stat st; fd = open(proc_info->map_path, O_RDONLY); @@ -311,8 +312,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( char buffer[PATH_MAX]; char perm[5]; - uint64_t begin, end; + uint64_t begin, end; uint64_t current_begin = context->current_block.base + context->current_block.size; @@ -321,10 +322,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( yr_get_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, &max_process_memory_chunk); + iterator->last_error = ERROR_SUCCESS; + if (proc_info->next_block_end <= current_begin) { - int n, path_start; - char *p; + int path_start, n = 0; + char* p; while (fgets(buffer, sizeof(buffer), proc_info->maps) != NULL) { @@ -398,14 +401,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( assert(context->current_block.size > 0); - iterator->last_error = ERROR_SUCCESS; - YR_DEBUG_FPRINTF( 2, stderr, "- %s() {} = %p // .base=0x%" PRIx64 " .size=%" PRIu64 "\n", __FUNCTION__, - result, + context->current_block, context->current_block.base, context->current_block.size); @@ -433,6 +434,9 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( _exit: + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + YR_DEBUG_FPRINTF(2, stderr, "} = %p // %s()\n", result, __FUNCTION__); return result; diff --git a/libyara/proc/mach.c b/libyara/proc/mach.c index 52e0df2e34..8ed3f56658 100644 --- a/libyara/proc/mach.c +++ b/libyara/proc/mach.c @@ -175,7 +175,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( context->current_block.base = 0; context->current_block.size = 0; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/proc/openbsd.c b/libyara/proc/openbsd.c index 1cba6fae60..8e2d0c311d 100644 --- a/libyara/proc/openbsd.c +++ b/libyara/proc/openbsd.c @@ -150,8 +150,6 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( int mib[] = {CTL_KERN, KERN_PROC_VMMAP, proc_info->pid}; size_t len = sizeof(struct kinfo_vmentry); - iterator->last_error = ERROR_SUCCESS; - uint64_t current_begin = context->current_block.base + context->current_block.size; @@ -160,6 +158,8 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_next_memory_block( yr_get_configuration_uint64( YR_CONFIG_MAX_PROCESS_MEMORY_CHUNK, &max_process_memory_chunk); + iterator->last_error = ERROR_SUCCESS; + if (proc_info->old_end <= current_begin) { if (sysctl(mib, 3, &proc_info->vm_entry, &len, NULL, 0) < 0) @@ -192,7 +192,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( proc_info->vm_entry.kve_start = 0; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/proc/windows.c b/libyara/proc/windows.c index 3668cff8a5..14780e175b 100644 --- a/libyara/proc/windows.c +++ b/libyara/proc/windows.c @@ -185,7 +185,12 @@ YR_API YR_MEMORY_BLOCK* yr_process_get_first_memory_block( context->current_block.base = (size_t) proc_info->si.lpMinimumApplicationAddress; - return yr_process_get_next_memory_block(iterator); + YR_MEMORY_BLOCK* result = yr_process_get_next_memory_block(iterator); + + if (result == NULL) + iterator->last_error = ERROR_COULD_NOT_READ_PROCESS_MEMORY; + + return result; } #endif diff --git a/libyara/re_lexer.c b/libyara/re_lexer.c index bf5520f351..7b96701432 100644 --- a/libyara/re_lexer.c +++ b/libyara/re_lexer.c @@ -639,8 +639,8 @@ static const YY_CHAR yy_meta[23] = static const flex_int16_t yy_base[51] = { 0, - 0, 20, 3, 5, 50, 89, 89, 89, 10, 36, - 0, 44, 43, 47, 38, 89, 26, 33, 89, 89, + 0, 20, 3, 5, 49, 89, 89, 89, 10, 36, + 0, 43, 38, 47, 89, 89, 26, 33, 89, 89, 89, 89, 89, 89, 89, 89, 4, 5, 0, 33, 32, 31, 29, 26, 24, 23, 15, 89, 8, 89, 89, 89, 0, 89, 67, 72, 77, 82, 84, 4 @@ -661,7 +661,7 @@ static const flex_int16_t yy_nxt[112] = 28, 9, 10, 39, 8, 14, 15, 14, 15, 29, 11, 7, 8, 16, 17, 40, 41, 29, 29, 40, 29, 9, 10, 29, 8, 29, 29, 29, 18, 38, - 11, 18, 29, 19, 20, 21, 22, 29, 29, 44, + 11, 18, 29, 19, 20, 21, 22, 29, 44, 44, 44, 23, 24, 25, 26, 31, 32, 33, 44, 44, 44, 44, 44, 34, 35, 36, 37, 6, 6, 6, 6, 6, 12, 12, 12, 12, 12, 30, 44, 30, @@ -678,7 +678,7 @@ static const flex_int16_t yy_chk[112] = 28, 1, 1, 39, 1, 3, 3, 4, 4, 37, 1, 2, 2, 9, 9, 27, 28, 36, 35, 39, 34, 2, 2, 33, 2, 32, 31, 30, 18, 17, - 2, 10, 15, 10, 10, 10, 10, 13, 12, 5, + 2, 10, 13, 10, 10, 10, 10, 12, 5, 0, 0, 10, 10, 10, 10, 14, 14, 14, 0, 0, 0, 0, 0, 14, 14, 14, 14, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 47, 0, 47, @@ -1377,9 +1377,26 @@ YY_RULE_SETUP #line 311 "re_lexer.l" { - // A range inside a character class. + // A range inside a character class. The regexp is... + // + // ( \x{hex_digit}{2} Hex digit (i.e: \x01) ... + // | \. ...or any escaped character (i.e. \\, \-) ... + // | [^]\] ...or any character except ] and \ ... + // ) + // - ... followed by - + // [^]] ... followed by any character except ] + // + // Some examples: + // // [abc0-9] - // ^- matching here + // ^-^ matching range 0-9 + // + // [a-za-] + // ^-^- matching range a-z + // + // [\.-a] + // ^--^- matching range \.-a + // uint16_t c; uint8_t start = yytext[0]; @@ -1422,67 +1439,55 @@ YY_RULE_SETUP YY_BREAK case 20: YY_RULE_SETUP -#line 357 "re_lexer.l" +#line 374 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= word_chars[i]; } YY_BREAK case 21: YY_RULE_SETUP -#line 366 "re_lexer.l" +#line 381 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~word_chars[i]; } YY_BREAK case 22: YY_RULE_SETUP -#line 375 "re_lexer.l" +#line 388 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= space_chars[i]; } YY_BREAK case 23: YY_RULE_SETUP -#line 384 "re_lexer.l" +#line 395 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~space_chars[i]; } YY_BREAK case 24: YY_RULE_SETUP -#line 393 "re_lexer.l" +#line 402 "re_lexer.l" { - char c; - - for (c = '0'; c <= '9'; c++) + for (char c = '0'; c <= '9'; c++) LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } YY_BREAK case 25: YY_RULE_SETUP -#line 402 "re_lexer.l" +#line 409 "re_lexer.l" { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) { // digits 0-7 are in the sixth byte of the vector, let that byte alone if (i == 6) @@ -1499,7 +1504,7 @@ YY_RULE_SETUP YY_BREAK case 26: YY_RULE_SETUP -#line 422 "re_lexer.l" +#line 427 "re_lexer.l" { uint8_t c; @@ -1517,7 +1522,7 @@ YY_RULE_SETUP YY_BREAK case 27: YY_RULE_SETUP -#line 438 "re_lexer.l" +#line 443 "re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1535,7 +1540,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(char_class): -#line 455 "re_lexer.l" +#line 460 "re_lexer.l" { // End of regexp reached while scanning a character class. @@ -1546,7 +1551,7 @@ case YY_STATE_EOF(char_class): YY_BREAK case 28: YY_RULE_SETUP -#line 464 "re_lexer.l" +#line 469 "re_lexer.l" { if (yytext[0] >= 32 && yytext[0] < 127) @@ -1561,7 +1566,7 @@ YY_RULE_SETUP } YY_BREAK case YY_STATE_EOF(INITIAL): -#line 478 "re_lexer.l" +#line 483 "re_lexer.l" { yyterminate(); @@ -1569,10 +1574,10 @@ case YY_STATE_EOF(INITIAL): YY_BREAK case 29: YY_RULE_SETUP -#line 483 "re_lexer.l" +#line 488 "re_lexer.l" ECHO; YY_BREAK -#line 1575 "re_lexer.c" +#line 1580 "re_lexer.c" case YY_END_OF_BUFFER: { @@ -2721,7 +2726,7 @@ void yyfree (void * ptr , yyscan_t yyscanner) #define YYTABLES_NAME "yytables" -#line 483 "re_lexer.l" +#line 488 "re_lexer.l" int escaped_char_value( @@ -2871,7 +2876,13 @@ int yr_parse_re_string( FAIL_ON_ERROR(yr_re_ast_create(re_ast)); - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(re_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/re_lexer.l b/libyara/re_lexer.l index 9a19410c5b..6cdc30b645 100644 --- a/libyara/re_lexer.l +++ b/libyara/re_lexer.l @@ -308,11 +308,28 @@ hex_digit [0-9a-fA-F] -(\\x{hex_digit}{2}|\\.|[^\\])\-[^]] { - - // A range inside a character class. +(\\x{hex_digit}{2}|\\.|[^]\\])-[^]] { + + // A range inside a character class. The regexp is... + // + // ( \x{hex_digit}{2} Hex digit (i.e: \x01) ... + // | \. ...or any escaped character (i.e. \\, \-) ... + // | [^]\] ...or any character except ] and \ ... + // ) + // - ... followed by - + // [^]] ... followed by any character except ] + // + // Some examples: + // // [abc0-9] - // ^- matching here + // ^-^ matching range 0-9 + // + // [a-za-] + // ^-^- matching range a-z + // + // [\.-a] + // ^--^- matching range \.-a + // uint16_t c; uint8_t start = yytext[0]; @@ -356,54 +373,42 @@ hex_digit [0-9a-fA-F] \\w { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= word_chars[i]; } \\W { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~word_chars[i]; } \\s { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= space_chars[i]; } \\S { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) LEX_ENV->re_class.bitmap[i] |= ~space_chars[i]; } \\d { - char c; - - for (c = '0'; c <= '9'; c++) + for (char c = '0'; c <= '9'; c++) LEX_ENV->re_class.bitmap[c / 8] |= 1 << c % 8; } \\D { - int i; - - for (i = 0; i < 32; i++) + for (int i = 0; i < 32; i++) { // digits 0-7 are in the sixth byte of the vector, let that byte alone if (i == 6) @@ -629,7 +634,13 @@ int yr_parse_re_string( FAIL_ON_ERROR(yr_re_ast_create(re_ast)); - yylex_init(&yyscanner); + if (yylex_init(&yyscanner) != 0) + { + yr_re_ast_destroy(*re_ast); + *re_ast = NULL; + return ERROR_INSUFFICIENT_MEMORY; + } + yyset_extra(*re_ast, yyscanner); yy_scan_string(re_string, yyscanner); yyparse(yyscanner, &lex_env); diff --git a/libyara/rules.c b/libyara/rules.c index df7f15077c..9cd1a2ebae 100644 --- a/libyara/rules.c +++ b/libyara/rules.c @@ -333,6 +333,9 @@ int yr_rules_from_arena(YR_ARENA* arena, YR_RULES** rules) YR_SUMMARY* summary = (YR_SUMMARY*) yr_arena_get_ptr( arena, YR_SUMMARY_SECTION, 0); + if (summary == NULL) + return ERROR_CORRUPT_FILE; + // Now YR_RULES relies on this arena, let's increment the arena's // reference count so that if the original owner of the arena calls // yr_arena_destroy the arena is not destroyed. diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index ee573fbba6..86b688dae4 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -241,6 +241,7 @@ cc_test( data = [ "data/tiny", "data/0ca09bde7602769120fadc4f7a4147347a7a97271370583586c9e587fd396171", + "data/bad_dotnet_pe", ], linkstatic = True, deps = [ diff --git a/tests/data/bad_dotnet_pe b/tests/data/bad_dotnet_pe new file mode 100644 index 0000000000..b4e07c1c6b Binary files /dev/null and b/tests/data/bad_dotnet_pe differ diff --git a/tests/test-dotnet.c b/tests/test-dotnet.c index f4d361b8b7..dc6703ccde 100644 --- a/tests/test-dotnet.c +++ b/tests/test-dotnet.c @@ -84,6 +84,15 @@ int main(int argc, char** argv) "tests/data/" "0ca09bde7602769120fadc4f7a4147347a7a97271370583586c9e587fd396171"); + assert_false_rule( + "import \"dotnet\" \ + rule test { \ + condition: \ + dotnet.version == \"v4.0.30319\" \ + }", + "tests/data/" + "bad_dotnet_pe"); + yr_finalize(); YR_DEBUG_FPRINTF( diff --git a/tests/test-magic.c b/tests/test-magic.c new file mode 100644 index 0000000000..ff2f34c868 --- /dev/null +++ b/tests/test-magic.c @@ -0,0 +1,44 @@ +#include +#include +#include + +#include "blob.h" +#include "util.h" + +int main(int argc, char** argv) +{ + int result = 0; + + YR_DEBUG_INITIALIZE(); + YR_DEBUG_FPRINTF(1, stderr, "+ %s() { // in %s\n", __FUNCTION__, argv[0]); + + init_top_srcdir(); + + yr_initialize(); + + assert_true_rule_blob( + "import \"magic\" rule test { condition: \ + magic.type() contains \"ELF\" }", + ELF32_FILE); + + assert_true_rule_blob( + "import \"magic\" rule test { condition: \ + magic.type() contains \"MS-DOS executable\" and \ + magic.mime_type() == \"application/x-dosexec\" }", + PE32_FILE); + + // Test case for https://github.com/VirusTotal/yara/issues/1663 + assert_true_rule_blob( + "import \"magic\" rule test { condition: \ + magic.type() contains \"Mach-O\" and \ + (magic.mime_type() == \"application/x-mach-binary\" or magic.mime_type() == \"application/octet-stream\") and \ + magic.type() contains \"Mach-O\"}", + MACHO_X86_FILE); + + yr_finalize(); + + YR_DEBUG_FPRINTF( + 1, stderr, "} = %d // %s() in %s\n", result, __FUNCTION__, argv[0]); + + return result; +} diff --git a/tests/test-rules.c b/tests/test-rules.c index 4846acfc09..1fd363b710 100644 --- a/tests/test-rules.c +++ b/tests/test-rules.c @@ -631,6 +631,28 @@ static void test_strings() }", "foobarbaz" TEXT_1024_BYTES); + // https://github.com/VirusTotal/yara/issues/1695 + assert_false_rule( + "rule test {\n\ + strings:\n\ + $a = \"AXS\"\n\ + $b = \"ERS\"\n\ + condition:\n\ + none of them in (0..10)\n\ + }", + "AXSERS" TEXT_1024_BYTES); + + // https://github.com/VirusTotal/yara/issues/1757 + assert_false_rule( + "rule test {\n\ + strings:\n\ + $a = \"foo\"\n\ + $b = \"foo\"\n\ + condition:\n\ + none of them in (0..1)\n\ + }", + "foo"); + // https://github.com/VirusTotal/yara/issues/1660 assert_false_rule( "rule test {\n\ @@ -1724,6 +1746,28 @@ static void test_of() }", "mississippi"); + // If one of the bounds can not be determined statically it isn't an error. + assert_true_rule( + "rule test { \ + strings: \ + $a = \"AXSERS\" \ + condition: \ + true or any of them in (0..filesize-100) \ + }", + TEXT_1024_BYTES); + + // Make sure that an undefined range boundary returns an undefined value, + // which translates to false. + assert_false_rule( + "import \"tests\" \ + rule test { \ + strings: \ + $a = \"missi\" \ + condition: \ + any of them in (0..tests.undefined.i) \ + }", + "mississippi"); + YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); } @@ -1952,6 +1996,16 @@ void test_for() }", NULL); + // Test case for https://github.com/VirusTotal/yara/issues/1729 + assert_true_rule( + "rule test { \ + strings: \ + $a = \"abcde\" \ + condition: \ + for any n in (1..10) : ( n of ($a*) ) \ + }", + "abcde"); + YR_DEBUG_FPRINTF(1, stderr, "} // %s()\n", __FUNCTION__); } @@ -2212,6 +2266,11 @@ void test_re() assert_true_regexp("a[\\-b]", "ab", "ab"); assert_true_regexp("a]", "a]", "a]"); assert_true_regexp("a[]]b", "a]b", "a]b"); + assert_true_regexp("[a-z]-b", "c-b-c", "c-b"); // Issue #1690 + assert_true_regexp("a[]-]b", "a]b", "a]b"); + assert_true_regexp("a[]-]b", "a-b", "a-b"); + assert_true_regexp("[\\.-z]*", "...abc", "...abc"); + assert_true_regexp("[\\.-]*", "...abc", "..."); assert_true_regexp("a[\\]]b", "a]b", "a]b"); assert_true_regexp("a[^bc]d", "aed", "aed"); assert_false_regexp("a[^bc]d", "abd");