diff --git a/ddtrace/appsec/_iast/__init__.py b/ddtrace/appsec/_iast/__init__.py index eb3bec683d4..9d33ac7c77f 100644 --- a/ddtrace/appsec/_iast/__init__.py +++ b/ddtrace/appsec/_iast/__init__.py @@ -51,33 +51,37 @@ def wrapped_function(wrapped, instance, args, kwargs): def _disable_iast_after_fork(): """ - Conditionally disable IAST in forked child processes to prevent segmentation faults. + Handle IAST state after fork to prevent segmentation faults. - This fork handler differentiates between two types of forks: + This fork handler works in conjunction with the C++ pthread_atfork handler + (registered in native.cpp) to provide complete fork-safety: - 1. **Early forks (web framework workers)**: Gunicorn, uvicorn, Django, Flask workers - fork BEFORE IAST initializes any state. These are safe - IAST remains enabled. + **C++ pthread_atfork handler (automatic, runs first):** + - Clears all taint maps (removes stale PyObject pointers from parent) + - Resets taint_engine_context (recreates context array with fresh state) + - Resets initializer (recreates memory pools) + - Happens automatically for ALL forks before this Python handler runs - 2. **Late forks (multiprocessing)**: multiprocessing.Process forks AFTER IAST has - initialized state. These inherit corrupted native extension state and must have - IAST disabled to prevent segmentation faults. + **Python fork handler (this function, runs second):** + - Detects fork type (early vs late) + - Manages Python-level IAST_CONTEXT + - Conditionally disables IAST for late forks (multiprocessing) - Detection logic: - - If IAST has active request contexts when fork occurs → Late fork → Disable IAST - - If IAST has no active state → Early fork (worker) → Keep IAST enabled + Fork types: + 1. **Early forks (web framework workers)**: Gunicorn, uvicorn, etc. fork BEFORE + IAST has active request contexts. Native state was reset by pthread_atfork. + IAST remains enabled and works correctly in workers. - This is critical for multiprocessing compatibility while maintaining IAST coverage - in web framework workers. The native extension state (taint maps, context slots, - object pools, shared_ptr references) cannot be safely used across fork boundaries - when it exists, but is safe to initialize fresh in clean workers. + 2. **Late forks (multiprocessing)**: fork AFTER IAST has active contexts. + Native state was reset by pthread_atfork, but we disable IAST in these + processes for multiprocessing compatibility and to avoid confusion. - For late forks, the child process: - - Clears all C++ taint maps and context slots - - Resets the Python-level IAST_CONTEXT - - Disables IAST by setting asm_config._iast_enabled = False + Detection logic: + - If is_iast_request_enabled() → Late fork → Disable IAST + - If not is_iast_request_enabled() → Early fork → Keep IAST enabled - This prevents segmentation faults in multiprocessing while allowing IAST to work - in web framework workers. + This prevents segmentation faults in ALL fork scenarios while maintaining + IAST coverage in web framework workers. """ if not asm_config._iast_enabled: return @@ -86,33 +90,45 @@ def _disable_iast_after_fork(): # Import locally to avoid issues if the module hasn't been loaded yet from ddtrace.appsec._iast._iast_request_context_base import IAST_CONTEXT from ddtrace.appsec._iast._iast_request_context_base import is_iast_request_enabled - from ddtrace.appsec._iast._taint_tracking._context import clear_all_request_context_slots + + # Note: The C++ pthread_atfork handler (in native.cpp) automatically resets + # native state in actual fork scenarios. It clears the inherited state and + # creates fresh instances without touching the invalid PyObject pointers. + # We don't need to (and shouldn't) call clear_all_request_context_slots() + # here because the C++ handler has already done the necessary cleanup. # In pytest mode, always disable IAST in child processes to avoid segfaults # when tests create multiprocesses (e.g., for testing fork behavior) if _iast_in_pytest_mode: log.debug("IAST fork handler: Pytest mode detected, disabling IAST in child process") - clear_all_request_context_slots() IAST_CONTEXT.set(None) asm_config._iast_enabled = False return if not is_iast_request_enabled(): # No active context - this is an early fork (web framework worker) - # IAST can be safely initialized fresh in this child process - log.debug("IAST fork handler: No active context, keeping IAST enabled (web worker fork)") + # The C++ pthread_atfork handler has already reset native state with fresh instances. + # IAST can continue working correctly in this child process. + log.debug( + "IAST fork handler: No active context (early fork/web worker). " + "Native state auto-reset by pthread_atfork. IAST remains enabled." + ) + # Clear Python-side context just in case + IAST_CONTEXT.set(None) return # Active context exists - this is a late fork (multiprocessing) - # Native state is corrupted, must disable IAST - log.debug("IAST fork handler: Active context detected, disabling IAST (multiprocessing fork)") + # The C++ pthread_atfork handler has already reset native state. + # We disable IAST in these processes for consistency. + log.debug( + "IAST fork handler: Active context (late fork/multiprocessing). " + "Native state auto-reset by pthread_atfork. Disabling IAST in child." + ) - # Clear C++ side: all taint maps and context slots - clear_all_request_context_slots() # Clear Python side: reset the context ID IAST_CONTEXT.set(None) - # Disable IAST to prevent segmentation faults + # Disable IAST for multiprocessing compatibility asm_config._iast_enabled = False except Exception as e: @@ -182,6 +198,7 @@ def enable_iast_propagation(): if asm_config._iast_propagation_enabled: from ddtrace.appsec._iast._ast.ast_patching import _should_iast_patch from ddtrace.appsec._iast._loader import _exec_iast_patched_module + from ddtrace.appsec._iast._taint_tracking import initialize_native_state global _iast_propagation_enabled if _iast_propagation_enabled: @@ -190,6 +207,7 @@ def enable_iast_propagation(): log.debug("iast::instrumentation::starting IAST") ModuleWatchdog.register_pre_exec_module_hook(_should_iast_patch, _exec_iast_patched_module) _iast_propagation_enabled = True + initialize_native_state() _register_fork_handler() diff --git a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt index 0144abb37ba..118cbebef98 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt +++ b/ddtrace/appsec/_iast/_taint_tracking/CMakeLists.txt @@ -63,6 +63,7 @@ file( GLOB SOURCE_FILES "*.cpp" + "api/*.cpp" "context/*.cpp" "aspects/*.cpp" "initializer/*.cpp" @@ -73,6 +74,7 @@ file( GLOB HEADER_FILES "*.h" + "api/*.h" "context/*.h" "aspects/*.h" "initializer/*.h" diff --git a/ddtrace/appsec/_iast/_taint_tracking/__init__.py b/ddtrace/appsec/_iast/_taint_tracking/__init__.py index ace537feed5..63415df5cd6 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/__init__.py +++ b/ddtrace/appsec/_iast/_taint_tracking/__init__.py @@ -1,4 +1,6 @@ +from ddtrace.appsec._iast._taint_tracking._native import initialize_native_state # noqa: F401 from ddtrace.appsec._iast._taint_tracking._native import ops # noqa: F401 +from ddtrace.appsec._iast._taint_tracking._native import reset_native_state # noqa: F401 from ddtrace.appsec._iast._taint_tracking._native.aspect_format import _format_aspect # noqa: F401 from ddtrace.appsec._iast._taint_tracking._native.aspect_helpers import _convert_escaped_text_to_tainted_text @@ -65,6 +67,8 @@ "copy_ranges_from_strings", "get_range_by_hash", "get_ranges", + "reset_native_state", + "initialize_native_state", "is_tainted", "new_pyobject_id", "origin_to_str", diff --git a/ddtrace/appsec/_iast/_taint_tracking/api/safe_context.cpp b/ddtrace/appsec/_iast/_taint_tracking/api/safe_context.cpp new file mode 100644 index 00000000000..3eb4ddc8b95 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/api/safe_context.cpp @@ -0,0 +1,35 @@ +#include "api/safe_context.h" + +// ============================================================================ +// Safe wrapper functions for global pointers +// ============================================================================ +// These functions centralize null checks for taint_engine_context and +// initializer to prevent segmentation faults when called before +// initialize_native_state(). All aspect functions should use these wrappers +// instead of accessing the globals directly. +TaintedObjectMapTypePtr +safe_get_tainted_object_map(PyObject* tainted_object) +{ + if (!taint_engine_context) { + return nullptr; + } + return taint_engine_context->get_tainted_object_map(tainted_object); +} + +TaintedObjectMapTypePtr +safe_get_tainted_object_map_from_list_of_pyobjects(const std::vector& objects) +{ + if (!taint_engine_context) { + return nullptr; + } + return taint_engine_context->get_tainted_object_map_from_list_of_pyobjects(objects); +} + +TaintedObjectMapTypePtr +safe_get_tainted_object_map_by_ctx_id(size_t ctx_id) +{ + if (!taint_engine_context) { + return nullptr; + } + return taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/api/safe_context.h b/ddtrace/appsec/_iast/_taint_tracking/api/safe_context.h new file mode 100644 index 00000000000..0060f2aed53 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/api/safe_context.h @@ -0,0 +1,30 @@ +#pragma once +#include "context/taint_engine_context.h" + +// Safe wrapper functions that check for null before accessing global pointers +// These functions centralize null checks to prevent segmentation faults when +// called before initialize_native_state() + +/** + * @brief Safely get tainted object map for a PyObject. + * @param tainted_object The Python object to look up + * @return TaintedObjectMapTypePtr or nullptr if not initialized or not found + */ +TaintedObjectMapTypePtr +safe_get_tainted_object_map(PyObject* tainted_object); + +/** + * @brief Safely get tainted object map by context ID. + * @param ctx_id The context ID to look up + * @return TaintedObjectMapTypePtr or nullptr if not initialized or not found + */ +TaintedObjectMapTypePtr +safe_get_tainted_object_map_by_ctx_id(size_t ctx_id); + +/** + * @brief Safely get tainted object map from a list of PyObjects. + * @param objects Vector of PyObject pointers to search + * @return TaintedObjectMapTypePtr or nullptr if not initialized or not found + */ +TaintedObjectMapTypePtr +safe_get_tainted_object_map_from_list_of_pyobjects(const std::vector& objects); diff --git a/ddtrace/appsec/_iast/_taint_tracking/api/safe_initializer.cpp b/ddtrace/appsec/_iast/_taint_tracking/api/safe_initializer.cpp new file mode 100644 index 00000000000..f337d98e18a --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/api/safe_initializer.cpp @@ -0,0 +1,46 @@ +#include "api/safe_initializer.h" + +TaintRangePtr +safe_allocate_taint_range(RANGE_START start, RANGE_LENGTH length, const Source& source, const SecureMarks secure_marks) +{ + if (!initializer) { + return nullptr; + } + return initializer->allocate_taint_range(start, length, source, secure_marks); +} + +TaintedObjectPtr +safe_allocate_tainted_object_copy(const TaintedObjectPtr& from) +{ + if (!initializer) { + return nullptr; + } + return initializer->allocate_tainted_object_copy(from); +} + +TaintedObjectPtr +safe_allocate_tainted_object() +{ + if (!initializer) { + return nullptr; + } + return initializer->allocate_tainted_object(); +} + +TaintedObjectPtr +safe_allocate_ranges_into_taint_object(TaintRangeRefs ranges) +{ + if (!initializer) { + return nullptr; + } + return initializer->allocate_ranges_into_taint_object(ranges); +} + +TaintedObjectPtr +safe_allocate_ranges_into_taint_object_copy(const TaintRangeRefs& ranges) +{ + if (!initializer) { + return nullptr; + } + return initializer->allocate_ranges_into_taint_object_copy(ranges); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/api/safe_initializer.h b/ddtrace/appsec/_iast/_taint_tracking/api/safe_initializer.h new file mode 100644 index 00000000000..b6161b363fd --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/api/safe_initializer.h @@ -0,0 +1,49 @@ +#pragma once + +#include "initializer/initializer.h" +#include + +/** + * @brief Safely allocate a taint range. + * @param start The start position of the taint range + * @param length The length of the taint range + * @param source The source of the taint + * @param secure_marks Optional secure marks + * @return TaintRangePtr or nullptr if not initialized + */ +TaintRangePtr +safe_allocate_taint_range(RANGE_START start, + RANGE_LENGTH length, + const Source& source, + const SecureMarks secure_marks = 0); + +/** + * @brief Safely allocate a copy of a tainted object. + * @param from The tainted object to copy + * @return TaintedObjectPtr or nullptr if not initialized + */ +TaintedObjectPtr +safe_allocate_tainted_object_copy(const TaintedObjectPtr& from); + +/** + * @brief Safely allocate a tainted object. + * @return TaintedObjectPtr or nullptr if not initialized + */ +TaintedObjectPtr +safe_allocate_tainted_object(); + +/** + * @brief Safely allocate ranges into a tainted object. + * @param ranges The ranges to allocate + * @return TaintedObjectPtr or nullptr if not initialized + */ +TaintedObjectPtr +safe_allocate_ranges_into_taint_object(TaintRangeRefs ranges); + +/** + * @brief Safely allocate a copy of ranges into a tainted object. + * @param ranges The ranges to copy and allocate + * @return TaintedObjectPtr or nullptr if not initialized + */ +TaintedObjectPtr +safe_allocate_ranges_into_taint_object_copy(const TaintRangeRefs& ranges); diff --git a/ddtrace/appsec/_iast/_taint_tracking/api/utils.h b/ddtrace/appsec/_iast/_taint_tracking/api/utils.h new file mode 100644 index 00000000000..d64ed5a2660 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/api/utils.h @@ -0,0 +1,4 @@ +#define CHECK_IAST_INITIALIZED_OR_RETURN(fallback_result) \ + if (!taint_engine_context || !initializer) { \ + return fallback_result; \ + } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.cpp index 9ca075953a3..48d9ee03587 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.cpp @@ -29,7 +29,7 @@ api_extend_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) return nullptr; } - auto ctx_map = taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ candidate_text, to_add }); + auto ctx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text, to_add }); if (not ctx_map or ctx_map->empty()) { auto method_name = PyUnicode_FromString("extend"); PyObject_CallMethodObjArgs(candidate_text, method_name, to_add, nullptr); @@ -40,7 +40,7 @@ api_extend_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) Py_DecRef(method_name); } else { const auto& to_candidate = get_tainted_object(candidate_text, ctx_map); - auto to_result = initializer->allocate_tainted_object_copy(to_candidate); + auto to_result = safe_allocate_tainted_object_copy(to_candidate); const auto& to_toadd = get_tainted_object(to_add, ctx_map); // Ensure no returns are done before this method call diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.h index 3d0982e77f9..c0a4ccc1769 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_extend.h @@ -1,7 +1,7 @@ #pragma once -#include "initializer/initializer.h" -#include +#include "api/safe_context.h" +#include "api/safe_initializer.h" PyObject* api_extend_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs); diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.cpp index e16c3f45f9f..c3bcb675cdb 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.cpp @@ -18,16 +18,17 @@ api_format_aspect(StrType& candidate_text, const py::args& args, const py::kwargs& kwargs) { - auto tx_map = taint_engine_context->get_tainted_object_map_from_list_of_pyobjects( - { candidate_text.ptr(), parameter_list.ptr() }); + auto return_candidate_result = [&]() -> StrType { return py::getattr(candidate_text, "format")(*args, **kwargs); }; + + auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text.ptr(), parameter_list.ptr() }); if (not tx_map or tx_map->empty()) { - return py::getattr(candidate_text, "format")(*args, **kwargs); + return return_candidate_result(); } auto [ranges_orig, candidate_text_ranges] = are_all_text_all_ranges(candidate_text.ptr(), parameter_list, tx_map); if (ranges_orig.empty() and candidate_text_ranges.empty()) { - return py::getattr(candidate_text, "format")(*args, **kwargs); + return return_candidate_result(); } auto new_template = diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.h index e833d8e7562..8e82ba936e7 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_format.h @@ -1,6 +1,7 @@ #pragma once +#include "api/safe_context.h" +#include "api/safe_initializer.h" #include "helpers.h" -#include "initializer/initializer.h" template StrType diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.cpp index 3c297ca411d..f9c0850b904 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.cpp @@ -1,6 +1,4 @@ #include "aspect_index.h" -#include "helpers.h" -#include "utils/string_utils.h" /** * @brief Index aspect @@ -26,7 +24,7 @@ index_aspect(PyObject* result_o, const auto idx_long = PyLong_AsLong(idx); if (current_range->start <= idx_long and idx_long < (current_range->start + current_range->length)) { ranges_to_set.emplace_back( - initializer->allocate_taint_range(0l, 1l, current_range->source, current_range->secure_marks)); + safe_allocate_taint_range(0l, 1l, current_range->source, current_range->secure_marks)); break; } } @@ -36,7 +34,7 @@ index_aspect(PyObject* result_o, const size_t& len_result_o{ get_pyobject_size(result_o) }; const auto& current_range = ranges.at(0); ranges_to_set.emplace_back( - initializer->allocate_taint_range(0l, len_result_o, current_range->source, current_range->secure_marks)); + safe_allocate_taint_range(0l, len_result_o, current_range->source, current_range->secure_marks)); } catch (const std::out_of_range& ex) { if (nullptr == result_o) { throw py::index_error(); @@ -80,8 +78,10 @@ api_index_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) return nullptr; } + CHECK_IAST_INITIALIZED_OR_RETURN(result_o); + TRY_CATCH_ASPECT("index_aspect", return result_o, , { - const auto tx_map = taint_engine_context->get_tainted_object_map(candidate_text); + const auto tx_map = safe_get_tainted_object_map(candidate_text); if (tx_map == nullptr or tx_map->empty()) { return result_o; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.h index c1a14bae54f..6ca7fc8fb78 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_index.h @@ -1,5 +1,10 @@ #pragma once +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" +#include "helpers.h" #include "tainted_ops/tainted_ops.h" +#include "utils/string_utils.h" PyObject* index_aspect(PyObject* result_o, diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.cpp index ee34695d2f8..03163b78f8c 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.cpp @@ -1,7 +1,5 @@ #include "aspect_join.h" -#include "helpers.h" - PyObject* aspect_join_str(PyObject* sep, PyObject* result, @@ -26,10 +24,10 @@ aspect_join_str(PyObject* sep, if (len_sep == 0 and to_iterable_str) { // Empty separator: the result is identical to iterable_str - result_to = initializer->allocate_tainted_object_copy(to_iterable_str); + result_to = safe_allocate_tainted_object_copy(to_iterable_str); } else if (len_sep > 0 and to_joiner and to_iterable_str == nullptr) { // Iterable str is not tainted, only add n-times the joiner taints - result_to = initializer->allocate_tainted_object(); + result_to = safe_allocate_tainted_object(); for (size_t i = 0; i < len_iterable - 1; i++) { current_pos += element_len; result_to->add_ranges_shifted(to_joiner, current_pos); @@ -37,7 +35,7 @@ aspect_join_str(PyObject* sep, } } else { // General case, iterable and joiner may be tainted - result_to = initializer->allocate_tainted_object(); + result_to = safe_allocate_tainted_object(); for (size_t i = 0; i < len_iterable; i++) { if (to_iterable_str) { result_to->add_ranges_shifted(to_iterable_str, current_pos, element_len, i); @@ -103,7 +101,7 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, const } else { if (result_to == nullptr) { // If first_tainted_to is null, it's ranges won't be copied - result_to = initializer->allocate_tainted_object_copy(first_tainted_to); + result_to = safe_allocate_tainted_object_copy(first_tainted_to); first_tainted_to = nullptr; } result_to->add_ranges_shifted(to_element, current_pos); @@ -115,7 +113,7 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, const if (len_sep > 0 and i < len_iterable - 1 and to_joiner) { if (result_to == nullptr) { // If first_tainted_to is null, it's ranges won't be copied - result_to = initializer->allocate_tainted_object_copy(first_tainted_to); + result_to = safe_allocate_tainted_object_copy(first_tainted_to); first_tainted_to = nullptr; } result_to->add_ranges_shifted(to_joiner, current_pos); @@ -125,7 +123,7 @@ aspect_join(PyObject* sep, PyObject* result, PyObject* iterable_elements, const if (result_to == nullptr) { if (first_tainted_to) { - result_to = initializer->allocate_tainted_object_copy(first_tainted_to); + result_to = safe_allocate_tainted_object_copy(first_tainted_to); } else { // No taints at all return result; @@ -186,8 +184,11 @@ api_join_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) } return nullptr; } + + CHECK_IAST_INITIALIZED_OR_RETURN(result); + TRY_CATCH_ASPECT("join_aspect", return result, , { - auto ctx_map = taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ sep, arg0 }); + auto ctx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ sep, arg0 }); if (not ctx_map or ctx_map->empty() or get_pyobject_size(result) == 0) { // Empty result cannot have taint ranges if (decref_arg0) { diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.h index 0a8d4617b92..5c9e7cfc255 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_join.h @@ -1,7 +1,8 @@ #pragma once -#include "initializer/initializer.h" - -namespace py = pybind11; +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" +#include "helpers.h" PyObject* api_join_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs); diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.cpp index e84169f9a8d..3b8be5cc329 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.cpp @@ -1,5 +1,4 @@ #include "aspect_modulo.h" -#include "helpers.h" static PyObject* do_modulo(PyObject* text, PyObject* insert_tuple_or_obj) @@ -59,8 +58,9 @@ api_modulo_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) return candidate_result; }; - const auto tx_map = - taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ candidate_text, candidate_tuple }); + CHECK_IAST_INITIALIZED_OR_RETURN(return_candidate_result()); + + const auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text, candidate_tuple }); if (!tx_map || tx_map->empty()) { return return_candidate_result(); } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.h index b82507c327f..f88f2f47cc6 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_modulo.h @@ -1,7 +1,8 @@ #pragma once -#include "initializer/initializer.h" - -namespace py = pybind11; +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" +#include "helpers.h" PyObject* api_modulo_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs); diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp index 7f2442555ea..f0d4490dc0f 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.cpp @@ -1,5 +1,4 @@ #include "aspect_operator_add.h" -#include "helpers.h" /** * This function updates result_o object with taint information of candidate_text and/or text_to_add @@ -48,14 +47,14 @@ add_aspect(PyObject* result_o, } if (!to_candidate_text) { - const auto tainted = initializer->allocate_tainted_object(); + const auto tainted = safe_allocate_tainted_object(); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); set_tainted_object(result_o, tainted, tx_taint_map); } // At this point we have both to_candidate_text and to_text_to_add to we add the // ranges from both to result_o - const auto tainted = initializer->allocate_tainted_object_copy(to_candidate_text); + const auto tainted = safe_allocate_tainted_object_copy(to_candidate_text); tainted->add_ranges_shifted(to_text_to_add, static_cast(len_candidate_text)); set_tainted_object(result_o, tainted, tx_taint_map); @@ -94,9 +93,10 @@ api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) return nullptr; } + CHECK_IAST_INITIALIZED_OR_RETURN(result_o); + TRY_CATCH_ASPECT("add_aspect", return result_o, , { - auto tx_map = - taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ candidate_text, text_to_add }); + auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text, text_to_add }); if (not tx_map || tx_map->empty()) { return result_o; } @@ -138,9 +138,10 @@ api_add_inplace_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) return nullptr; } + CHECK_IAST_INITIALIZED_OR_RETURN(result_o); + TRY_CATCH_ASPECT("add_inplace_aspect", return result_o, , { - auto tx_map = - taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ candidate_text, text_to_add }); + auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text, text_to_add }); if (not tx_map or tx_map->empty()) { return result_o; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.h index 9a05f720805..edfef0c81ae 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_operator_add.h @@ -1,6 +1,8 @@ #pragma once -#include "context/taint_engine_context.h" -#include "initializer/initializer.h" +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" +#include "helpers.h" PyObject* api_add_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs); diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.cpp index b49e7375540..244b4d4939b 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.cpp @@ -105,7 +105,7 @@ build_index_range_map(PyObject* text, TaintRangeRefs& ranges, PyObject* start, P PyObject* slice_aspect(PyObject* result_o, PyObject* candidate_text, PyObject* start, PyObject* stop, PyObject* step) { - const auto ctx_map = taint_engine_context->get_tainted_object_map(candidate_text); + const auto ctx_map = safe_get_tainted_object_map(candidate_text); if (not ctx_map or ctx_map->empty()) { return result_o; @@ -158,4 +158,4 @@ api_slice_aspect(PyObject* self, PyObject* const* args, Py_ssize_t nargs) Py_XDECREF(slice); return res; }); -} +} \ No newline at end of file diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.h index c783f76a102..cdf73ee01ce 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_slice.h @@ -1,4 +1,8 @@ #pragma once +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" +#include "helpers.h" #include "tainted_ops/tainted_ops.h" PyObject* diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.cpp index 5a66f63a964..cabdc895c59 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.cpp @@ -1,6 +1,4 @@ #include "aspect_split.h" -#include "initializer/initializer.h" -#include "tainted_ops/tainted_ops.h" static std::optional handle_potential_re_split(const py::tuple& args, @@ -57,8 +55,9 @@ split_text_common(const py::object& orig_function, const py::tuple sliced_args = len(args) > 1 ? args[py::slice(1, len(args), 1)] : py::tuple(); // (,) auto result_o = text.attr(split_func.c_str())(*sliced_args, **kwargs); // returns['', ''] WHY? - const auto tx_map = - taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ text.ptr(), sliced_args.ptr() }); + CHECK_IAST_INITIALIZED_OR_RETURN(result_o); + + const auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ text.ptr(), sliced_args.ptr() }); if (!tx_map || tx_map->empty()) { return result_o; } @@ -99,8 +98,7 @@ api_splitlines_text(const py::object& orig_function, const py::tuple sliced_args = len(args) > 1 ? args[py::slice(1, len(args), 1)] : py::tuple(); py::object result_o = text.attr("splitlines")(*sliced_args, **kwargs); - const auto tx_map = - taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ text.ptr(), sliced_args.ptr() }); + const auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ text.ptr(), sliced_args.ptr() }); if (!tx_map || tx_map->empty()) { return result_o; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.h index 7e889d50835..803656153c9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_split.h @@ -1,6 +1,9 @@ #pragma once - +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" #include "helpers.h" +#include "tainted_ops/tainted_ops.h" py::object api_splitlines_text(const py::object& orig_function, diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.cpp index 5874efee912..4c4175e3db3 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.cpp @@ -1,5 +1,4 @@ #include "aspect_str.h" -#include "helpers.h" static void set_lengthupdated_ranges(PyObject* result, @@ -177,9 +176,10 @@ api_str_aspect(PyObject* self, PyObject* const* args, const Py_ssize_t nargs, Py Py_RETURN_NONE; } } + CHECK_IAST_INITIALIZED_OR_RETURN(result_o); TRY_CATCH_ASPECT("str_aspect", return result_o, , { - const auto tx_map = taint_engine_context->get_tainted_object_map(text); + const auto tx_map = safe_get_tainted_object_map(text); if (!tx_map || tx_map->empty()) { return result_o; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.h index e61fc7b5275..510cbb9c413 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspect_str.h @@ -1,4 +1,7 @@ #pragma once +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" #include "helpers.h" PyObject* diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.cpp index bf0107ad10e..5c35eb43ec9 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.cpp @@ -1,10 +1,6 @@ #include "aspects_os_path.h" #include -#include "helpers.h" -#include "initializer/initializer.h" -#include "utils/string_utils.h" - static bool starts_with_separator(const py::handle& arg, const std::string& separator) { @@ -19,8 +15,9 @@ api_ospathjoin_aspect(StrType& first_part, const py::args& args) auto join = safe_import("os.path", "join"); auto result_o = join(first_part, *args); - const auto tx_map = - taint_engine_context->get_tainted_object_map_from_list_of_pyobjects({ first_part.ptr(), args.ptr() }); + CHECK_IAST_INITIALIZED_OR_RETURN(result_o); + + const auto tx_map = safe_get_tainted_object_map_from_list_of_pyobjects({ first_part.ptr(), args.ptr() }); if (not tx_map or tx_map->empty()) { return result_o; } @@ -107,7 +104,7 @@ api_ospathbasename_aspect(const StrType& path) auto result_o = basename(path); TRY_CATCH_ASPECT("ospathbasename_aspect", return result_o, , { - const auto tx_map = taint_engine_context->get_tainted_object_map(path.ptr()); + const auto tx_map = safe_get_tainted_object_map(path.ptr()); if (not tx_map or tx_map->empty() or py::len(result_o) == 0) { return result_o; } @@ -139,7 +136,7 @@ api_ospathdirname_aspect(const StrType& path) auto result_o = dirname(path); TRY_CATCH_ASPECT("ospathdirname_aspect", return result_o, , { - const auto tx_map = taint_engine_context->get_tainted_object_map(path.ptr()); + const auto tx_map = safe_get_tainted_object_map(path.ptr()); if (not tx_map or tx_map->empty() or py::len(result_o) == 0) { return result_o; } @@ -171,7 +168,7 @@ forward_to_set_ranges_on_splitted(const char* function_name, const StrType& path auto result_o = function(path); TRY_CATCH_ASPECT("forward_to_set_ranges_on_splitted", return result_o, , { - const auto tx_map = taint_engine_context->get_tainted_object_map(path.ptr()); + const auto tx_map = safe_get_tainted_object_map(path.ptr()); if (not tx_map or tx_map->empty() or py::len(result_o) == 0) { return result_o; } @@ -222,7 +219,7 @@ api_ospathnormcase_aspect(const StrType& path) auto result_o = normcase(path); TRY_CATCH_ASPECT("ospathnormcase_aspect", return result_o, , { - const auto tx_map = taint_engine_context->get_tainted_object_map(path.ptr()); + const auto tx_map = safe_get_tainted_object_map(path.ptr()); if (not tx_map or tx_map->empty()) { return result_o; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.h index c4e6fcc56c8..cb5b6f3dc24 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/aspects_os_path.h @@ -1,5 +1,9 @@ #pragma once -#include "initializer/initializer.h" +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" +#include "helpers.h" +#include "utils/string_utils.h" namespace py = pybind11; diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.cpp index 127cff48cc3..66448415806 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.cpp @@ -1,5 +1,4 @@ #include "helpers.h" -#include "utils/python_error_guard.h" #include #include @@ -23,7 +22,7 @@ api_common_replace(const py::str& string_method, { const StrType res = py::getattr(candidate_text, string_method)(*args, **kwargs); - const auto tx_map = taint_engine_context->get_tainted_object_map(candidate_text.ptr()); + const auto tx_map = safe_get_tainted_object_map(candidate_text.ptr()); if (not tx_map or tx_map->empty()) { return res; @@ -102,7 +101,9 @@ api_as_formatted_evidence(const StrType& text, const optional& tag_mapping_mode, const optional& new_ranges) { - const auto tx_map = taint_engine_context->get_tainted_object_map(text.ptr()); + CHECK_IAST_INITIALIZED_OR_RETURN(text); + + const auto tx_map = safe_get_tainted_object_map(text.ptr()); if (!tx_map or tx_map->empty()) { return text; } @@ -124,8 +125,9 @@ api_as_formatted_evidence(const StrType& text, py::bytearray api_convert_escaped_text_to_taint_text(const py::bytearray& taint_escaped_text, const TaintRangeRefs& ranges_orig) { + CHECK_IAST_INITIALIZED_OR_RETURN(taint_escaped_text); - const auto tx_map = taint_engine_context->get_tainted_object_map(taint_escaped_text.ptr()); + const auto tx_map = safe_get_tainted_object_map(taint_escaped_text.ptr()); if (!tx_map) { return taint_escaped_text; } @@ -143,6 +145,8 @@ template StrType api_convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const TaintRangeRefs& ranges_orig) { + CHECK_IAST_INITIALIZED_OR_RETURN(taint_escaped_text); + const auto tx_map = taint_engine_context->get_tainted_object_map_from_ranges(ranges_orig); if (!tx_map) { return taint_escaped_text; @@ -159,6 +163,8 @@ api_convert_escaped_text_to_taint_text(PyObject* taint_escaped_text, const TaintRangeRefs& ranges_orig, const PyTextType py_str_type) { + CHECK_IAST_INITIALIZED_OR_RETURN(taint_escaped_text); + if (taint_escaped_text == nullptr or ranges_orig.empty()) { return taint_escaped_text; } @@ -250,8 +256,8 @@ convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const Tain id_evidence = get<0>(previous_context); const shared_ptr& original_range = get_range_by_hash(getNum(id_evidence), optional_ranges_orig); - ranges.emplace_back(initializer->allocate_taint_range( - start, length, original_range->source, original_range->secure_marks)); + ranges.emplace_back( + safe_allocate_taint_range(start, length, original_range->source, original_range->secure_marks)); } latest_end = end; } @@ -282,8 +288,8 @@ convert_escaped_text_to_taint_text(const StrType& taint_escaped_text, const Tain id_evidence = get<0>(context); const shared_ptr& original_range = get_range_by_hash(getNum(id_evidence), optional_ranges_orig); - ranges.emplace_back(initializer->allocate_taint_range( - start, end - start, original_range->source, original_range->secure_marks)); + ranges.emplace_back( + safe_allocate_taint_range(start, end - start, original_range->source, original_range->secure_marks)); } latest_end = end; } @@ -339,7 +345,7 @@ set_ranges_on_splitted(const py::object& source_str, if (new_length > 0) { item_ranges.emplace_back( - initializer->allocate_taint_range(new_start, new_length, range->source, range->secure_marks)); + safe_allocate_taint_range(new_start, new_length, range->source, range->secure_marks)); } } } @@ -367,7 +373,9 @@ api_set_ranges_on_splitted(const StrType& source_str, bool include_separator, size_t context_id) { - const auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id); + CHECK_IAST_INITIALIZED_OR_RETURN(false); + + const auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id); if (not tx_map or tx_map->empty()) { return false; } @@ -423,8 +431,8 @@ are_all_text_all_ranges(PyObject* candidate_text, tuple api_are_all_text_all_ranges(py::handle& candidate_text, const py::list& parameter_list) { - const auto tx_map = taint_engine_context->get_tainted_object_map_from_list_of_pyobjects( - { candidate_text.ptr(), parameter_list.ptr() }); + const auto tx_map = + safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text.ptr(), parameter_list.ptr() }); if (not tx_map or tx_map->empty()) { return { {}, {} }; } @@ -435,8 +443,8 @@ api_are_all_text_all_ranges(py::handle& candidate_text, const py::list& paramete tuple api_are_all_text_all_ranges(py::handle& candidate_text, const py::tuple& parameter_list) { - const auto tx_map = taint_engine_context->get_tainted_object_map_from_list_of_pyobjects( - { candidate_text.ptr(), parameter_list.ptr() }); + const auto tx_map = + safe_get_tainted_object_map_from_list_of_pyobjects({ candidate_text.ptr(), parameter_list.ptr() }); if (not tx_map or tx_map->empty()) { return { {}, {} }; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.h b/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.h index a58891ebc7f..f7d0db1b6eb 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.h +++ b/ddtrace/appsec/_iast/_taint_tracking/aspects/helpers.h @@ -8,17 +8,16 @@ #include #include +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "api/utils.h" #include "constants.h" -#include "context/taint_engine_context.h" -#include "initializer/initializer.h" #include "structmember.h" #include "taint_tracking/source.h" #include "taint_tracking/taint_range.h" +#include "utils/python_error_guard.h" #include "utils/string_utils.h" -using namespace pybind11::literals; -namespace py = pybind11; - // Calls the specified method and applies the same ranges to the result. Used // for wrapping simple methods that doesn't change the string size like upper(), // lower() and similar. @@ -51,7 +50,7 @@ template StrType int_as_formatted_evidence(const StrType& text, TaintRangeRefs& text_ranges, TagMappingMode tag_mapping_mode) { - if (const auto tx_map = taint_engine_context->get_tainted_object_map(text.ptr()); !tx_map) { + if (const auto tx_map = safe_get_tainted_object_map(text.ptr()); !tx_map) { return text; } return StrType(as_formatted_evidence(AnyTextObjectToString(text), text_ranges, tag_mapping_mode, nullopt)); diff --git a/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.cpp b/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.cpp index 397b43d7b88..657b5b6f9ff 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.cpp @@ -108,6 +108,12 @@ TaintEngineContext::clear_all_request_context_slots() } } +void +TaintEngineContext::clear_tainted_object_map() +{ + request_context_slots.clear(); +} + TaintedObjectMapTypePtr TaintEngineContext::get_tainted_object_map(PyObject* obj) { @@ -305,23 +311,60 @@ TaintEngineContext::get_tainted_object_map_by_ctx_id(size_t ctx_id) void pyexport_taint_engine_context(py::module& m) { - m.def("finish_request_context", [](size_t ctx_id) { taint_engine_context->finish_request_context(ctx_id); }); - m.def("start_request_context", [] { return taint_engine_context->start_request_context(); }); - m.def("clear_all_request_context_slots", [] { return taint_engine_context->clear_all_request_context_slots(); }); - m.def("get_tainted_object_map_by_ctx_id", - [](size_t ctx_id) { return taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id) != nullptr; }); + m.def("finish_request_context", [](size_t ctx_id) { + if (!taint_engine_context) + return; + taint_engine_context->finish_request_context(ctx_id); + }); + + m.def("start_request_context", []() -> std::optional { + if (!taint_engine_context) + return std::nullopt; + return taint_engine_context->start_request_context(); + }); + + m.def("clear_all_request_context_slots", [] { + if (!taint_engine_context) + return; + taint_engine_context->clear_all_request_context_slots(); + }); + + m.def("get_tainted_object_map_by_ctx_id", [](size_t ctx_id) { + if (!taint_engine_context) + return false; + return taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id) != nullptr; + }); + m.def("is_in_taint_map", [](py::object tainted_obj) { + if (!taint_engine_context) + return false; auto map_ptr = taint_engine_context->get_tainted_object_map(tainted_obj.ptr()); return map_ptr != nullptr; }); - m.def("debug_context_array_size", [] { return taint_engine_context->debug_context_array_size(); }); - m.def("debug_context_array_free_slots_number", - [] { return taint_engine_context->debug_context_array_free_slots_number(); }); - m.def("debug_taint_map", [](size_t ctx_id) { return taint_engine_context->debug_taint_map(ctx_id); }); + m.def("debug_context_array_size", [] { + if (!taint_engine_context) + return static_cast(0); + return taint_engine_context->debug_context_array_size(); + }); - m.def("debug_num_tainted_objects", - [](size_t ctx_id) { return taint_engine_context->debug_num_tainted_objects(ctx_id); }); + m.def("debug_context_array_free_slots_number", [] { + if (!taint_engine_context) + return static_cast(0); + return taint_engine_context->debug_context_array_free_slots_number(); + }); + + m.def("debug_taint_map", [](size_t ctx_id) { + if (!taint_engine_context) + return std::string(""); + return taint_engine_context->debug_taint_map(ctx_id); + }); + + m.def("debug_num_tainted_objects", [](size_t ctx_id) -> int { + if (!taint_engine_context) + return 0; + return taint_engine_context->debug_num_tainted_objects(ctx_id); + }); } std::unique_ptr taint_engine_context; diff --git a/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.h b/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.h index 5a083ec0e23..75f90661416 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.h +++ b/ddtrace/appsec/_iast/_taint_tracking/context/taint_engine_context.h @@ -93,6 +93,10 @@ class TaintEngineContext // Clear all maps and free all slots. void clear_all_request_context_slots(); + // Fork-safe: Abandon all slots without cleanup (for post-fork child process). + // This skips calling destructors/cleanup on slots containing invalid PyObject pointers. + void clear_tainted_object_map(); + // Convenience: retrieve the tainted object directly if present in any // active map; nullptr otherwise. TaintedObjectPtr get_tainted_object_from_request_context_slot(PyObject* tainted_object); diff --git a/ddtrace/appsec/_iast/_taint_tracking/native.cpp b/ddtrace/appsec/_iast/_taint_tracking/native.cpp index 61a10e2ca8e..1d30d03299f 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/native.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/native.cpp @@ -6,6 +6,7 @@ * - Taint ranges: Information related to tainted values. */ #include +#include #include #include "aspects/aspect_extend.h" @@ -62,12 +63,140 @@ static struct PyModuleDef ops __attribute__((used)) = { PyModuleDef_HEAD_INIT, .m_methods = OpsMethods }; /** - * This function initializes the native module. + * Initialize or reinitialize the native IAST global state. + * + * This creates fresh instances of the global singletons: + * - initializer: manages memory pools for taint ranges and objects + * - taint_engine_context: manages per-request taint maps + * + * IMPORTANT: This function is idempotent. If already initialized, it does nothing. + * This ensures that calling it multiple times doesn't destroy existing taint state. + * + * Can be called: + * - At module load time (automatic) + * - From Python explicitly (for manual control) + * - Multiple times safely (idempotent) */ -PYBIND11_MODULE(_native, m) +static void +initialize_native_state() { + // Idempotent: only initialize if not already initialized + // This preserves existing taint state when called multiple times + if (initializer && taint_engine_context) { + return; // Already initialized, nothing to do + } + + // Create fresh instances of global singletons initializer = make_unique(); taint_engine_context = make_unique(); +} + +/** + * Reset native IAST state after fork. + * + * This function safely resets all C++ global state that may have been + * inherited from the parent process during fork. It: + * + * 1. Clears all taint maps (they contain stale PyObject pointers) + * 2. Resets the taint_engine_context (recreates the context array) + * 3. Resets the initializer (recreates memory pools) + * + * This prevents crashes from: + * - Dereferencing stale PyObject pointers from parent + * - Using corrupted shared_ptr control blocks + * - Accessing invalid memory in std::vector/map internals + * + * CRITICAL IMPLEMENTATION DETAIL: unique_ptr.release() vs unique_ptr.reset() + * ------------------------------------------------------------------------- + * This function uses unique_ptr::release() instead of unique_ptr::reset() for + * a specific technical reason: + * + * - unique_ptr::reset(): Deletes the managed object by calling its destructor, + * then sets the pointer to nullptr. The destructor will try to clean up all + * resources including PyObject pointers. + * + * - unique_ptr::release(): Returns the raw pointer and releases ownership + * WITHOUT calling the destructor. The pointer becomes the caller's + * responsibility, but we intentionally leak it. + * + * WHY WE USE release() (intentional memory leak): + * ----------------------------------------------- + * After fork, the child process inherits copies of the parent's C++ objects + * via copy-on-write. These objects contain PyObject pointers that reference + * Python objects in the PARENT process's memory space. These pointers are + * now INVALID in the child process because: + * 1. Python's interpreter state is separate in the child + * 2. The PyObject addresses point to the parent's address space + * 3. Accessing them causes segmentation faults + * + * If we called .reset() (destructor path): + * - The TaintEngineContext destructor would iterate over all taint maps + * - It would try to decrement refcounts on PyObject pointers + * - This would access invalid memory → SEGFAULT + * + * By calling .release() (leak path): + * - We abandon the old objects without cleanup + * - No destructors run, so no invalid PyObject pointers are accessed + * - The memory remains allocated but unused (leaked) + * + * WHY THIS LEAK IS ACCEPTABLE: + * --------------------------- + * 1. Child process lifetime: These child processes are ephemeral (short-lived + * workers or test processes) and will exit soon, releasing all memory + * 2. Memory was COW: The leaked memory was copy-on-write from the parent, so + * it's a small cost compared to the parent's memory footprint + * 3. Alternative is crash: The only alternative is a guaranteed segfault, + * making the leak the lesser evil + * 4. One-time cost: This leak happens exactly once per child process at fork + * time, not repeatedly + * + * Trade-off: Small one-time memory leak vs guaranteed segmentation fault + * + * Called automatically via pthread_atfork in child processes. + */ +static void +reset_native_state_after_fork() +{ + // Important: We're in the child process after fork. + // The Python GIL is held by the forking thread in the child. + + // WARNING: The inherited context contains PyObject pointers from the PARENT process. + // These pointers are now invalid in the child and must not be dereferenced. + // + // CRITICAL: We must NOT call destructors on the inherited objects because they will + // try to clean up PyObject pointers that are invalid in the child process. + // Instead, we intentionally "leak" the old objects by abandoning them without cleanup. + // This is safe because: + // 1. We're in a child process that will exit eventually + // 2. The memory was copy-on-write from the parent + // 3. Calling destructors would cause segfaults + + // Step 1: Abandon the old pointers WITHOUT calling destructors + // Using release() transfers ownership and returns the raw pointer without cleanup + // We intentionally leak these objects to avoid segfaults from destructor cleanup + if (taint_engine_context) { + taint_engine_context->clear_tainted_object_map(); + (void)taint_engine_context.release(); // Leak the old object + } + if (initializer) { + (void)initializer.release(); // Leak the old object + } + + // Step 2: Recreate fresh instances + initialize_native_state(); +} + +/** + * This function initializes the native module. + */ +PYBIND11_MODULE(_native, m) +{ + // Register pthread_atfork handler to reset state in child processes + // This provides automatic fork-safety: + // - prepare: NULL (nothing to do before fork in parent) + // - parent: NULL (nothing to do after fork in parent) + // - child: reset_native_state_after_fork (reset globals in child) + pthread_atfork(nullptr, nullptr, reset_native_state_after_fork); // Create a atexit callback to cleanup the Initializer before the interpreter finishes auto atexit_register = safe_import("atexit", "register"); @@ -104,6 +233,19 @@ PYBIND11_MODULE(_native, m) pyexport_m_aspect_helpers(m); + // Export fork-safety functions + m.def("reset_native_state", + &reset_native_state_after_fork, + "Reset native IAST state after fork. " + "This recreates all global C++ singletons with fresh state, " + "clearing any inherited PyObject pointers or corrupted memory from parent process."); + + m.def("initialize_native_state", + &initialize_native_state, + "Explicitly initialize native IAST state. " + "Normally called automatically at module load, but can be called manually " + "from Python for explicit initialization control."); + // Note: the order of these definitions matter. For example, // stacktrace_element definitions must be before the ones of the // classes inheriting from it. diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.cpp b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.cpp index 3031cfa4012..5bb34de3abd 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.cpp @@ -1,6 +1,6 @@ #include "taint_range.h" -#include "context/taint_engine_context.h" -#include "initializer/initializer.h" +#include "api/safe_context.h" +#include "api/safe_initializer.h" namespace py = pybind11; @@ -59,11 +59,17 @@ TaintRange::has_origin(OriginType origin) const TaintRangePtr shift_taint_range(const TaintRangePtr& source_taint_range, const RANGE_START offset, const RANGE_LENGTH new_length = -1) { + // CRITICAL: Check if the shared_ptr is valid before accessing its members. + // After fork or during cleanup, taint ranges can become null/empty. + if (!source_taint_range) { + return nullptr; + } + const auto new_length_to_use = new_length == -1 ? source_taint_range->length : new_length; - auto tptr = initializer->allocate_taint_range(source_taint_range->start + offset, - new_length_to_use, - source_taint_range->source, - source_taint_range->secure_marks); + auto tptr = safe_allocate_taint_range(source_taint_range->start + offset, + new_length_to_use, + source_taint_range->source, + source_taint_range->secure_marks); return tptr; } @@ -76,6 +82,11 @@ shift_taint_ranges(const TaintRangeRefs& source_taint_ranges, new_ranges.reserve(source_taint_ranges.size()); for (const auto& trange : source_taint_ranges) { + // Skip ONLY if the input range itself is null/empty (data corruption from fork/cleanup). + // Do NOT skip if allocation fails - that should propagate as nullptr to caller. + if (!trange) { + continue; // Skip corrupted/null input ranges + } new_ranges.emplace_back(shift_taint_range(trange, offset, new_length)); } return new_ranges; @@ -92,7 +103,11 @@ api_shift_taint_ranges(const TaintRangeRefs& source_taint_ranges, bool api_set_ranges(py::handle& str, const TaintRangeRefs& ranges, const size_t contextid) { - const auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(contextid); + if (!taint_engine_context) { + return false; + } + + const auto tx_map = safe_get_tainted_object_map_by_ctx_id(contextid); if (not tx_map) { return false; } @@ -124,6 +139,15 @@ api_set_ranges(py::handle& str, const TaintRangeRefs& ranges, const size_t conte PyObject* api_taint_pyobject(PyObject* self, PyObject* const* args, const Py_ssize_t nargs) { + if (!taint_engine_context) { + // Return the original object unchanged if context is not initialized + if (nargs >= 1) { + return args[0]; + } + PyErr_SetString(PyExc_RuntimeError, "IAST not initialized"); + return nullptr; + } + bool result = false; const char* result_error_msg = MSG_ERROR_N_PARAMS; PyObject* pyobject_n = nullptr; @@ -132,7 +156,7 @@ api_taint_pyobject(PyObject* self, PyObject* const* args, const Py_ssize_t nargs PyObject* tainted_object = args[0]; PyObject* ctx_obj = args[5]; size_t context_id = PyLong_AsSize_t(ctx_obj); - const auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id); + const auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id); if (not tx_map) { return tainted_object; } @@ -145,7 +169,7 @@ api_taint_pyobject(PyObject* self, PyObject* const* args, const Py_ssize_t nargs if (const string source_value = PyObjectToString(args[3]); not source_value.empty()) { const auto source_origin = static_cast(PyLong_AsLong(args[4])); const auto source = Source(source_name, source_value, source_origin); - const auto range = initializer->allocate_taint_range(0, len_pyobject, source, {}); + const auto range = safe_allocate_taint_range(0, len_pyobject, source, {}); const auto ranges = vector{ range }; result = set_ranges(pyobject_n, ranges, tx_map); if (not result) { @@ -206,7 +230,7 @@ set_ranges(PyObject* str, const TaintRangeRefs& ranges, const TaintedObjectMapTy } auto obj_id = get_unique_id(str); const auto it = tx_map->find(obj_id); - auto new_tainted_object = initializer->allocate_ranges_into_taint_object(ranges); + auto new_tainted_object = safe_allocate_ranges_into_taint_object(ranges); set_fast_tainted_if_notinterned_unicode(str); if (it != tx_map->end()) { @@ -240,7 +264,11 @@ get_range_by_hash(const size_t range_hash, optional& taint_range TaintRangeRefs api_get_ranges(const py::handle& string_input) { - const auto tx_map = taint_engine_context->get_tainted_object_map(string_input.ptr()); + if (!taint_engine_context) { + return {}; + } + + const auto tx_map = safe_get_tainted_object_map(string_input.ptr()); if (not tx_map or tx_map->empty()) { return {}; @@ -256,7 +284,11 @@ api_get_ranges(const py::handle& string_input) void api_copy_ranges_from_strings(py::handle& str_1, py::handle& str_2) { - const auto tx_map = taint_engine_context->get_tainted_object_map(str_1.ptr()); + if (!taint_engine_context) { + return; + } + + const auto tx_map = safe_get_tainted_object_map(str_1.ptr()); if (not tx_map) { py::set_error(PyExc_ValueError, MSG_ERROR_TAINT_MAP); @@ -279,7 +311,11 @@ api_copy_and_shift_ranges_from_strings(py::handle& str_1, const int offset, const int new_length = -1) { - const auto tx_map = taint_engine_context->get_tainted_object_map(str_1.ptr()); + if (!taint_engine_context) { + return; + } + + const auto tx_map = safe_get_tainted_object_map(str_1.ptr()); if (not tx_map) { py::set_error(PyExc_ValueError, MSG_ERROR_TAINT_MAP); return; @@ -438,7 +474,7 @@ pyexport_taintrange(py::module& m) } else if (py::hasattr(secure_marks, "value")) { marks = secure_marks.attr("value").cast(); } - return initializer->allocate_taint_range(start, length, source, marks); + return safe_allocate_taint_range(start, length, source, marks); }, "start"_a, "length"_a, diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.h b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.h index 9add6c43974..e0598762666 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.h +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/taint_range.h @@ -6,6 +6,7 @@ #include "structmember.h" +#include "api/utils.h" #include "constants.h" #include "taint_tracking/source.h" #include "utils/string_utils.h" diff --git a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp index e72f2f1bbf1..81c2d1d0cc7 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/taint_tracking/tainted_object.cpp @@ -1,3 +1,4 @@ +#include "api/safe_initializer.h" #include "initializer/initializer.h" namespace py = pybind11; @@ -22,8 +23,7 @@ allocate_limited_taint_range_with_offset(const TaintRangePtr& source_taint_range else length = source_taint_range->length; - auto tptr = - initializer->allocate_taint_range(offset, length, source_taint_range->source, source_taint_range->secure_marks); + auto tptr = safe_allocate_taint_range(offset, length, source_taint_range->source, source_taint_range->secure_marks); return tptr; } @@ -35,10 +35,10 @@ allocate_limited_taint_range_with_offset(const TaintRangePtr& source_taint_range TaintRangePtr shift_taint_range(const TaintRangePtr& source_taint_range, const RANGE_START offset) { - auto tptr = initializer->allocate_taint_range(source_taint_range->start + offset, - source_taint_range->length, - source_taint_range->source, - source_taint_range->secure_marks); + auto tptr = safe_allocate_taint_range(source_taint_range->start + offset, + source_taint_range->length, + source_taint_range->source, + source_taint_range->secure_marks); return tptr; } diff --git a/ddtrace/appsec/_iast/_taint_tracking/tainted_ops/tainted_ops.cpp b/ddtrace/appsec/_iast/_taint_tracking/tainted_ops/tainted_ops.cpp index 675b2419c68..267d22380d8 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tainted_ops/tainted_ops.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tainted_ops/tainted_ops.cpp @@ -24,6 +24,10 @@ is_tainted(PyObject* tainted_object, const TaintedObjectMapTypePtr& tx_taint_map bool api_is_tainted(py::object tainted_object) { + if (!taint_engine_context) { + return false; + } + if (tainted_object) { const auto& to_initial = taint_engine_context->get_tainted_object_from_request_context_slot(tainted_object.ptr()); diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp index 2391c26b43c..9abbb30602e 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_helpers.cpp @@ -274,7 +274,7 @@ TEST_F(AsFormattedEvidenceCheckNoContext, NoTaintMapSameString) { const py::str text("This is a test string."); // With no request context and no ranges, the output should equal the input - const auto tx_map = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper, tx_map); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); } @@ -373,7 +373,7 @@ using AllAsFormattedEvidenceCheckNoContext = PyEnvCheck; TEST_F(AllAsFormattedEvidenceCheckNoContext, NoTaintMapSameString) { const py::str text("This is a test string."); - const auto tx_map = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper, tx_map); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); } @@ -381,7 +381,7 @@ TEST_F(AllAsFormattedEvidenceCheckNoContext, NoTaintMapSameString) TEST_F(AllAsFormattedEvidenceCheck, NoRangesSameString) { const py::str text("This is a test string."); - const auto tx_map = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper, tx_map); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); } @@ -399,7 +399,7 @@ TEST_F(AllAsFormattedEvidenceCheck, SingleTaintRangeWithNormalMapper) api_set_ranges(text, taint_ranges, context_id.value()); const py::str expected_result("This :+-is-+: a test string."); - const auto tx_map_after = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map_after = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Normal, tx_map_after); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); @@ -420,7 +420,7 @@ TEST_F(AllAsFormattedEvidenceCheck, SingleTaintRangeWithMapper) auto taint_range_1_hash = taint_ranges[0]->get_hash(); const py::str expected_result("This :+-<" + std::to_string(taint_range_1_hash) + ">is<" + std::to_string(taint_range_1_hash) + ">-+: a test string."); - const auto tx_map_after = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map_after = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper, tx_map_after); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); @@ -444,7 +444,7 @@ TEST_F(AllAsFormattedEvidenceCheck, DISABLED_SingleTaintRangeWithMapperReplace) new_ranges[py::cast(taint_ranges[0])] = new_range; const py::str expected_result("This :+-is-+: a test string."); - const auto tx_map_after3 = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map_after3 = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper_Replace, tx_map_after3); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(expected_result).c_str()); @@ -453,7 +453,7 @@ TEST_F(AllAsFormattedEvidenceCheck, DISABLED_SingleTaintRangeWithMapperReplace) TEST_F(AllAsFormattedEvidenceCheck, EmptyText) { const py::str text(""); - const auto tx_map = taint_engine_context->get_tainted_object_map(text.ptr()); + const auto tx_map = safe_get_tainted_object_map(text.ptr()); const py::str result = all_as_formatted_evidence(text, TagMappingMode::Mapper, tx_map); EXPECT_STREQ(AnyTextObjectToString(result).c_str(), AnyTextObjectToString(text).c_str()); } @@ -574,7 +574,7 @@ TEST_F(SetRangesOnSplittedCheck, EmptySourceAndSplit) auto ctx = taint_engine_context->start_request_context(); ASSERT_TRUE(ctx.has_value()); this->context_id = ctx; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); EXPECT_FALSE(result); @@ -601,7 +601,7 @@ TEST_F(SetRangesOnSplittedCheck, SingleSplitWithoutSeparator) this->context_id = ctx; api_set_ranges(source_str, source_ranges, context_id.value()); - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); EXPECT_TRUE(result); @@ -641,7 +641,7 @@ TEST_F(SetRangesOnSplittedCheck, MultipleSplitsNoSeparator) this->context_id = ctx; api_set_ranges(source_str, source_ranges, context_id.value()); - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); EXPECT_TRUE(result); @@ -693,7 +693,7 @@ TEST_F(SetRangesOnSplittedCheck, SplitWithSeparatorIncluded) this->context_id = ctx; api_set_ranges(source_str, source_ranges, context_id.value()); - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, true); EXPECT_TRUE(result); @@ -734,7 +734,7 @@ TEST_F(SetRangesOnSplittedCheck, EmptyRanges) auto ctx = taint_engine_context->start_request_context(); ASSERT_TRUE(ctx.has_value()); this->context_id = ctx; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); bool result = set_ranges_on_splitted(source_str, source_ranges, split_result, tx_map, false); EXPECT_FALSE(result); diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp index a91e7b11502..0da880b2974 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_index_aspect.cpp @@ -8,7 +8,7 @@ using AspectIndexCheck = PyEnvWithContext; TEST_F(AspectIndexCheck, check_index_internal_all_nullptr) { const TaintRangeRefs refs; - const auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + const auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); index_aspect(nullptr, nullptr, nullptr, refs, tx_map); } @@ -16,7 +16,7 @@ TEST_F(AspectIndexCheck, check_index_internal_all_nullptr_negative_index) { PyObject* idx = PyLong_FromLong(-1); const TaintRangeRefs refs; - const auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(context_id.value()); + const auto tx_map = safe_get_tainted_object_map_by_ctx_id(context_id.value()); auto ret = index_aspect(nullptr, nullptr, idx, refs, tx_map); EXPECT_EQ(ret, nullptr); Py_DecRef(idx); diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_modulo_aspect.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_modulo_aspect.cpp index c85b4ef974f..5e6ac3e7bf7 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_modulo_aspect.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_modulo_aspect.cpp @@ -64,14 +64,14 @@ TEST_F(AspectModuloCheck, check_api_modulo_aspect_float_parameter_with_tainted_t // Create a context and taint the template prefix to simulate tainted template without percent in the tainted range this->context_id = taint_engine_context->start_request_context(); ASSERT_TRUE(this->context_id.has_value()); - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(this->context_id.value()); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(this->context_id.value()); PyObject* candidate_text = this->StringToPyObjectStr("template %0.2f"); PyObject* param = PyFloat_FromDouble(3.14159); // Taint the word "template" (positions 0..8) auto source = Source("input1", "template", OriginType::PARAMETER); - auto range = initializer->allocate_taint_range(0, 8, source, {}); + auto range = safe_allocate_taint_range(0, 8, source, {}); TaintRangeRefs ranges{ range }; ASSERT_TRUE(set_ranges(candidate_text, ranges, tx_map)); diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_safe_wrappers.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_safe_wrappers.cpp new file mode 100644 index 00000000000..d0df4e6a385 --- /dev/null +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_safe_wrappers.cpp @@ -0,0 +1,272 @@ +#include +#include +#include + +#include "api/safe_context.h" +#include "api/safe_initializer.h" +#include "context/taint_engine_context.h" +#include "initializer/initializer.h" +#include "taint_tracking/taint_range.h" +#include "taint_tracking/tainted_object.h" + +namespace py = pybind11; + +/** + * Test suite for safe wrapper functions that handle uninitialized global pointers. + * + * These tests verify that the safe_* wrapper functions correctly handle the case + * when taint_engine_context or initializer are nullptr (before initialize_native_state() + * is called), preventing segmentation faults. + */ + +// ============================================================================ +// Test safe_context wrapper functions +// ============================================================================ + +class SafeContextWrappersTest : public ::testing::Test +{ + protected: + void SetUp() override + { + // Intentionally do NOT initialize globals - test nullptr handling + taint_engine_context.reset(); + initializer.reset(); + } + + void TearDown() override + { + // Clean up + taint_engine_context.reset(); + initializer.reset(); + } +}; + +TEST_F(SafeContextWrappersTest, SafeGetTaintedObjectMap_ReturnsNullptr_WhenContextIsNull) +{ + py::str test_str("test"); + auto result = safe_get_tainted_object_map(test_str.ptr()); + EXPECT_EQ(result, nullptr); +} + +TEST_F(SafeContextWrappersTest, SafeGetTaintedObjectMapByCtxId_ReturnsNullptr_WhenContextIsNull) +{ + auto result = safe_get_tainted_object_map_by_ctx_id(0); + EXPECT_EQ(result, nullptr); +} + +TEST_F(SafeContextWrappersTest, SafeGetTaintedObjectMapFromList_ReturnsNullptr_WhenContextIsNull) +{ + py::str str1("str1"); + py::str str2("str2"); + std::vector objects = { str1.ptr(), str2.ptr() }; + auto result = safe_get_tainted_object_map_from_list_of_pyobjects(objects); + EXPECT_EQ(result, nullptr); +} + +// ============================================================================ +// Test safe_initializer wrapper functions +// ============================================================================ + +class SafeInitializerWrappersTest : public ::testing::Test +{ + protected: + void SetUp() override + { + // Intentionally do NOT initialize globals - test nullptr handling + taint_engine_context.reset(); + initializer.reset(); + } + + void TearDown() override + { + // Clean up + taint_engine_context.reset(); + initializer.reset(); + } +}; + +TEST_F(SafeInitializerWrappersTest, SafeAllocateTaintRange_ReturnsNullptr_WhenInitializerIsNull) +{ + Source src("test", "value", OriginType::PARAMETER); + auto result = safe_allocate_taint_range(0, 10, src, 0); + EXPECT_EQ(result, nullptr); +} + +TEST_F(SafeInitializerWrappersTest, SafeAllocateTaintedObject_ReturnsNullptr_WhenInitializerIsNull) +{ + auto result = safe_allocate_tainted_object(); + EXPECT_EQ(result, nullptr); +} + +TEST_F(SafeInitializerWrappersTest, SafeAllocateTaintedObjectCopy_ReturnsNullptr_WhenInitializerIsNull) +{ + auto dummy = std::make_shared(); + auto result = safe_allocate_tainted_object_copy(dummy); + EXPECT_EQ(result, nullptr); +} + +TEST_F(SafeInitializerWrappersTest, SafeAllocateRangesIntoTaintObject_ReturnsNullptr_WhenInitializerIsNull) +{ + TaintRangeRefs ranges; + auto result = safe_allocate_ranges_into_taint_object(ranges); + EXPECT_EQ(result, nullptr); +} + +TEST_F(SafeInitializerWrappersTest, SafeAllocateRangesIntoTaintObjectCopy_ReturnsNullptr_WhenInitializerIsNull) +{ + TaintRangeRefs ranges; + auto result = safe_allocate_ranges_into_taint_object_copy(ranges); + EXPECT_EQ(result, nullptr); +} + +// ============================================================================ +// Test that safe wrappers work correctly when globals ARE initialized +// ============================================================================ + +class SafeWrappersWithInitializationTest : public ::testing::Test +{ + protected: + void SetUp() override + { + // Initialize globals properly + initializer = std::make_unique(); + taint_engine_context = std::make_unique(); + taint_engine_context->clear_all_request_context_slots(); + } + + void TearDown() override + { + if (taint_engine_context) { + taint_engine_context->clear_all_request_context_slots(); + } + taint_engine_context.reset(); + initializer.reset(); + } +}; + +TEST_F(SafeWrappersWithInitializationTest, SafeGetTaintedObjectMapByCtxId_ReturnsMap_WhenInitialized) +{ + auto idx_opt = taint_engine_context->start_request_context(); + ASSERT_TRUE(idx_opt.has_value()); + const auto ctx_id = *idx_opt; + + auto result = safe_get_tainted_object_map_by_ctx_id(ctx_id); + EXPECT_NE(result, nullptr); +} + +TEST_F(SafeWrappersWithInitializationTest, SafeAllocateTaintRange_WorksCorrectly_WhenInitialized) +{ + Source src("test_source", "test_value", OriginType::PARAMETER); + auto result = safe_allocate_taint_range(0, 10, src, 0); + ASSERT_NE(result, nullptr); + EXPECT_EQ(result->start, 0); + EXPECT_EQ(result->length, 10); + EXPECT_EQ(result->source.name, "test_source"); + EXPECT_EQ(result->source.value, "test_value"); +} + +TEST_F(SafeWrappersWithInitializationTest, SafeAllocateTaintedObject_WorksCorrectly_WhenInitialized) +{ + auto result = safe_allocate_tainted_object(); + ASSERT_NE(result, nullptr); + EXPECT_TRUE(result->get_ranges().empty()); +} + +TEST_F(SafeWrappersWithInitializationTest, SafeAllocateRangesIntoTaintObject_WorksCorrectly_WhenInitialized) +{ + // Create some ranges + TaintRangeRefs ranges; + Source src1("src1", "val1", OriginType::PARAMETER); + Source src2("src2", "val2", OriginType::PARAMETER); + + auto range1 = safe_allocate_taint_range(0, 5, src1, 0); + auto range2 = safe_allocate_taint_range(5, 5, src2, 0); + + ranges.push_back(range1); + ranges.push_back(range2); + + auto result = safe_allocate_ranges_into_taint_object(ranges); + ASSERT_NE(result, nullptr); + EXPECT_EQ(result->get_ranges().size(), 2); + EXPECT_EQ(result->get_ranges()[0]->source.name, "src1"); + EXPECT_EQ(result->get_ranges()[1]->source.name, "src2"); +} + +TEST_F(SafeWrappersWithInitializationTest, SafeAllocateRangesIntoTaintObjectCopy_WorksCorrectly_WhenInitialized) +{ + // Create some ranges + TaintRangeRefs ranges; + Source src("source", "value", OriginType::PARAMETER); + auto range = safe_allocate_taint_range(0, 10, src, 0); + ranges.push_back(range); + + auto result = safe_allocate_ranges_into_taint_object_copy(ranges); + ASSERT_NE(result, nullptr); + EXPECT_EQ(result->get_ranges().size(), 1); + EXPECT_EQ(result->get_ranges()[0]->source.name, "source"); + + // Verify it's a copy - original ranges should still be valid + EXPECT_EQ(ranges.size(), 1); + EXPECT_NE(ranges[0], nullptr); +} + +// ============================================================================ +// Test initialize_native_state and reset_native_state behavior +// ============================================================================ + +class InitializeNativeStateTest : public ::testing::Test +{ + protected: + void SetUp() override + { + // Ensure clean state + taint_engine_context.reset(); + initializer.reset(); + } + + void TearDown() override + { + // Clean up after each test + taint_engine_context.reset(); + initializer.reset(); + } +}; + +TEST_F(InitializeNativeStateTest, GlobalsAreNull_BeforeInitialization) +{ + EXPECT_EQ(taint_engine_context, nullptr); + EXPECT_EQ(initializer, nullptr); +} + +TEST_F(InitializeNativeStateTest, GlobalsAreInitialized_AfterInitialization) +{ + // Initialize + initializer = std::make_unique(); + taint_engine_context = std::make_unique(); + + EXPECT_NE(taint_engine_context, nullptr); + EXPECT_NE(initializer, nullptr); +} + +TEST_F(InitializeNativeStateTest, CanCreateRequestContext_AfterInitialization) +{ + // Initialize + initializer = std::make_unique(); + taint_engine_context = std::make_unique(); + + auto idx_opt = taint_engine_context->start_request_context(); + EXPECT_TRUE(idx_opt.has_value()); +} + +TEST_F(InitializeNativeStateTest, SafeWrappersReturnNull_BeforeInitialization) +{ + // Verify safe wrappers handle nullptr gracefully + py::str test_str("test"); + + EXPECT_EQ(safe_get_tainted_object_map(test_str.ptr()), nullptr); + EXPECT_EQ(safe_get_tainted_object_map_by_ctx_id(0), nullptr); + + Source src("test", "value", OriginType::PARAMETER); + EXPECT_EQ(safe_allocate_taint_range(0, 10, src, 0), nullptr); + EXPECT_EQ(safe_allocate_tainted_object(), nullptr); +} diff --git a/ddtrace/appsec/_iast/_taint_tracking/tests/test_taint_engine_context.cpp b/ddtrace/appsec/_iast/_taint_tracking/tests/test_taint_engine_context.cpp index 8f0832f28c3..f891fb22655 100644 --- a/ddtrace/appsec/_iast/_taint_tracking/tests/test_taint_engine_context.cpp +++ b/ddtrace/appsec/_iast/_taint_tracking/tests/test_taint_engine_context.cpp @@ -2,6 +2,7 @@ #include #include +#include "api/safe_context.h" #include "context/taint_engine_context.h" #include "taint_tracking/taint_range.h" #include "taint_tracking/tainted_object.h" @@ -51,7 +52,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_ListScan_NoRefcountLeak_With auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto ctx_id = *idx_opt; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(ctx_id); ASSERT_NE(tx_map, nullptr); py::str a("a"); @@ -71,7 +72,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_ListScan_NoRefcountLeak_With const long b_before = refcnt(b.ptr()); const long t_before = refcnt(tainted_str.ptr()); - auto m = taint_engine_context->get_tainted_object_map(lst.ptr()); + auto m = safe_get_tainted_object_map(lst.ptr()); ASSERT_NE(m, nullptr); ASSERT_EQ(refcnt(lst.ptr()), lst_before); @@ -98,7 +99,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_ListScan_NoRefcountLeak_NoMa const long b_before = refcnt(b.ptr()); const long c_before = refcnt(c.ptr()); - auto m = taint_engine_context->get_tainted_object_map(lst.ptr()); + auto m = safe_get_tainted_object_map(lst.ptr()); ASSERT_EQ(m, nullptr); ASSERT_EQ(refcnt(lst.ptr()), lst_before); @@ -112,7 +113,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_TupleScan_NoRefcountLeak_Wit auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto ctx_id = *idx_opt; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(ctx_id); ASSERT_NE(tx_map, nullptr); py::str x("x"); @@ -131,7 +132,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_TupleScan_NoRefcountLeak_Wit const long y_before = refcnt(y.ptr()); const long t_before = refcnt(tainted_str.ptr()); - auto m = taint_engine_context->get_tainted_object_map(tup.ptr()); + auto m = safe_get_tainted_object_map(tup.ptr()); ASSERT_NE(m, nullptr); ASSERT_EQ(refcnt(tup.ptr()), tup_before); @@ -145,7 +146,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_DictScan_NoRefcountLeak_With auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto ctx_id = *idx_opt; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(ctx_id); ASSERT_NE(tx_map, nullptr); py::str key1("k1"); @@ -165,7 +166,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_DictScan_NoRefcountLeak_With const long v1_before = refcnt(val1.ptr()); const long tv_before = refcnt(tainted_val.ptr()); - auto m = taint_engine_context->get_tainted_object_map(d.ptr()); + auto m = safe_get_tainted_object_map(d.ptr()); ASSERT_NE(m, nullptr); ASSERT_EQ(refcnt(d.ptr()), d_before); @@ -195,7 +196,7 @@ TEST_F(ApplicationContextTest, GetCurrentContextMap_DictScan_NoRefcountLeak_NoMa const long v1_before = refcnt(val1.ptr()); const long v2_before = refcnt(val2.ptr()); - auto m = taint_engine_context->get_tainted_object_map(d.ptr()); + auto m = safe_get_tainted_object_map(d.ptr()); ASSERT_EQ(m, nullptr); ASSERT_EQ(refcnt(d.ptr()), d_before); @@ -210,14 +211,14 @@ TEST_F(ApplicationContextTest, CreateTwoContextsAndRetrieveByIndex) // Create first context auto idx1 = taint_engine_context->start_request_context(); ASSERT_TRUE(idx1.has_value()); - auto m1 = taint_engine_context->get_tainted_object_map_by_ctx_id(*idx1); + auto m1 = safe_get_tainted_object_map_by_ctx_id(*idx1); ASSERT_NE(m1, nullptr); // Create second context, should be a different slot/map auto idx2 = taint_engine_context->start_request_context(); ASSERT_TRUE(idx2.has_value()); ASSERT_NE(*idx1, *idx2); - auto m2 = taint_engine_context->get_tainted_object_map_by_ctx_id(*idx2); + auto m2 = safe_get_tainted_object_map_by_ctx_id(*idx2); ASSERT_NE(m2, nullptr); ASSERT_NE(m1, m2); } @@ -226,11 +227,11 @@ TEST_F(ApplicationContextTest, ClearSpecificMap) { auto idx = taint_engine_context->start_request_context(); ASSERT_TRUE(idx.has_value()); - auto m = taint_engine_context->get_tainted_object_map_by_ctx_id(*idx); + auto m = safe_get_tainted_object_map_by_ctx_id(*idx); ASSERT_NE(m, nullptr); taint_engine_context->finish_request_context(*idx); - auto after = taint_engine_context->get_tainted_object_map_by_ctx_id(*idx); + auto after = safe_get_tainted_object_map_by_ctx_id(*idx); ASSERT_EQ(after, nullptr); } @@ -244,13 +245,13 @@ TEST_F(ApplicationContextTest, ReuseFreedSlotOnCreate) // Free the first slot taint_engine_context->finish_request_context(*idx1); - ASSERT_EQ(taint_engine_context->get_tainted_object_map_by_ctx_id(*idx1), nullptr); + ASSERT_EQ(safe_get_tainted_object_map_by_ctx_id(*idx1), nullptr); // Next create should reuse the first free slot (lowest index first) auto idx3 = taint_engine_context->start_request_context(); ASSERT_TRUE(idx3.has_value()); ASSERT_EQ(*idx3, *idx1); - auto m3 = taint_engine_context->get_tainted_object_map_by_ctx_id(*idx3); + auto m3 = safe_get_tainted_object_map_by_ctx_id(*idx3); ASSERT_NE(m3, nullptr); } @@ -269,7 +270,7 @@ TEST_F(ApplicationContextTest, ClearAllContexts) taint_engine_context->clear_all_request_context_slots(); for (size_t i = 0; i < cap; ++i) { // All slots should be cleared - auto m = taint_engine_context->get_tainted_object_map_by_ctx_id(i); + auto m = safe_get_tainted_object_map_by_ctx_id(i); ASSERT_EQ(m, nullptr); } } @@ -280,7 +281,7 @@ TEST_F(ApplicationContextTest, ClearTaintMapFreesContainedTaintedObjects) auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto ctx_id = *idx_opt; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(ctx_id); ASSERT_NE(tx_map, nullptr); // Insert a TaintedObjectPtr directly into the map with a dummy key/hash @@ -309,7 +310,7 @@ TEST_F(ApplicationContextTest, ClearContextsArrayFreesTaintRangeMap) auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto ctx_id = *idx_opt; - auto tx_map = taint_engine_context->get_tainted_object_map_by_ctx_id(ctx_id); + auto tx_map = safe_get_tainted_object_map_by_ctx_id(ctx_id); ASSERT_NE(tx_map, nullptr); // Take a weak reference to the map itself @@ -332,7 +333,7 @@ TEST_F(ApplicationContextTest, FinishRequestContextWithInvalidIndexIsNoop) // Act: finish an out-of-range index; should not crash taint_engine_context->finish_request_context(cap + 10); // Assert: out-of-range map lookup returns nullptr - auto m = taint_engine_context->get_tainted_object_map_by_ctx_id(cap + 10); + auto m = safe_get_tainted_object_map_by_ctx_id(cap + 10); ASSERT_EQ(m, nullptr); } @@ -341,22 +342,22 @@ TEST_F(ApplicationContextTest, FinishRequestContextTwiceIsNoop) auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto id = *idx_opt; - ASSERT_NE(taint_engine_context->get_tainted_object_map_by_ctx_id(id), nullptr); + ASSERT_NE(safe_get_tainted_object_map_by_ctx_id(id), nullptr); // First finish clears the slot taint_engine_context->finish_request_context(id); - ASSERT_EQ(taint_engine_context->get_tainted_object_map_by_ctx_id(id), nullptr); + ASSERT_EQ(safe_get_tainted_object_map_by_ctx_id(id), nullptr); // Second finish is a no-op taint_engine_context->finish_request_context(id); - ASSERT_EQ(taint_engine_context->get_tainted_object_map_by_ctx_id(id), nullptr); + ASSERT_EQ(safe_get_tainted_object_map_by_ctx_id(id), nullptr); } TEST_F(ApplicationContextTest, GetTaintedObjectMapByInvalidIndexReturnsNull) { const auto cap = taint_engine_context->debug_context_array_size(); - auto m1 = taint_engine_context->get_tainted_object_map_by_ctx_id(cap); - auto m2 = taint_engine_context->get_tainted_object_map_by_ctx_id(cap + 123); + auto m1 = safe_get_tainted_object_map_by_ctx_id(cap); + auto m2 = safe_get_tainted_object_map_by_ctx_id(cap + 123); ASSERT_EQ(m1, nullptr); ASSERT_EQ(m2, nullptr); } @@ -389,11 +390,11 @@ TEST_F(ApplicationContextTest, ClearAllIsIdempotent) auto idx_opt = taint_engine_context->start_request_context(); ASSERT_TRUE(idx_opt.has_value()); const auto id = *idx_opt; - ASSERT_NE(taint_engine_context->get_tainted_object_map_by_ctx_id(id), nullptr); + ASSERT_NE(safe_get_tainted_object_map_by_ctx_id(id), nullptr); taint_engine_context->clear_all_request_context_slots(); - ASSERT_EQ(taint_engine_context->get_tainted_object_map_by_ctx_id(id), nullptr); + ASSERT_EQ(safe_get_tainted_object_map_by_ctx_id(id), nullptr); // Second call should be harmless taint_engine_context->clear_all_request_context_slots(); - ASSERT_EQ(taint_engine_context->get_tainted_object_map_by_ctx_id(id), nullptr); + ASSERT_EQ(safe_get_tainted_object_map_by_ctx_id(id), nullptr); } diff --git a/ddtrace/internal/settings/asm.py b/ddtrace/internal/settings/asm.py index 99c63922718..d8064418630 100644 --- a/ddtrace/internal/settings/asm.py +++ b/ddtrace/internal/settings/asm.py @@ -262,7 +262,7 @@ class ASMConfig(DDConfig): _bypass_instrumentation_for_waf = False # IAST supported on python 3.6 to 3.13 and never on windows - _iast_supported: bool = ((3, 6, 0) <= sys.version_info < (3, 14, 0)) and not ( + _iast_supported: bool = ((3, 6, 0) <= sys.version_info < (3, 15, 0)) and not ( sys.platform.startswith("win") or sys.platform.startswith("cygwin") ) diff --git a/releasenotes/notes/fix-iast-fork-worker-crashes-44a1f14516fb44a2.yaml b/releasenotes/notes/fix-iast-fork-worker-crashes-44a1f14516fb44a2.yaml new file mode 100644 index 00000000000..dc70730df19 --- /dev/null +++ b/releasenotes/notes/fix-iast-fork-worker-crashes-44a1f14516fb44a2.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + Code Security: Fixes critical memory safety issue in IAST when used with forked worker processes + (MCP servers with Gunicorn and Uvicorn). Workers previously crashed with segmentation faults due to stale PyObject + pointers in native taint maps after fork. diff --git a/tests/appsec/iast/aspects/conftest.py b/tests/appsec/iast/aspects/conftest.py index 20531169538..6b78469e2d7 100644 --- a/tests/appsec/iast/aspects/conftest.py +++ b/tests/appsec/iast/aspects/conftest.py @@ -1,5 +1,6 @@ import pytest +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from tests.appsec.iast.iast_utils import _end_iast_context_and_oce from tests.appsec.iast.iast_utils import _start_iast_context_and_oce from tests.utils import override_global_config @@ -10,6 +11,7 @@ def iast_request(): with override_global_config( dict(_iast_enabled=True, _iast_is_testing=True, _iast_deduplication_enabled=False, _iast_request_sampling=100.0) ): + initialize_native_state() context_id = _start_iast_context_and_oce() assert context_id is not None try: diff --git a/tests/appsec/iast/aspects/test_aspect_helpers.py b/tests/appsec/iast/aspects/test_aspect_helpers.py index fbe3b910a2c..5788779a492 100644 --- a/tests/appsec/iast/aspects/test_aspect_helpers.py +++ b/tests/appsec/iast/aspects/test_aspect_helpers.py @@ -10,6 +10,7 @@ from ddtrace.appsec._iast._taint_tracking import as_formatted_evidence from ddtrace.appsec._iast._taint_tracking import common_replace from ddtrace.appsec._iast._taint_tracking import get_ranges +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking import set_ranges from ddtrace.appsec._iast._taint_tracking import set_ranges_on_splitted from ddtrace.appsec._iast._taint_tracking.aspects import _convert_escaped_text_to_tainted_text @@ -18,9 +19,6 @@ _SOURCE1 = Source(name="name", value="value", origin=OriginType.COOKIE) _SOURCE2 = Source(name="name2", value="value2", origin=OriginType.BODY) -_RANGE1 = TaintRange(0, 2, _SOURCE1) -_RANGE2 = TaintRange(1, 3, _SOURCE2) - def test_common_replace_untainted(): s = "foobar" @@ -50,9 +48,24 @@ def test_common_replace_untainted_wrong_args_method(): _ = common_replace("rjust", s, "10", 35) -def test_common_replace_tainted_str(): +def test_common_replace_tainted_str_multiple_initialize_native_state(): + initialize_native_state() context_id = _get_iast_context_id() + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) + print(f"context_id {context_id}") + s = "FooBar" + set_ranges(s, [_RANGE1, _RANGE2], context_id) + s2 = common_replace("lower", s) + assert s2 == "foobar" + assert get_ranges(s2) == [_RANGE1, _RANGE2] + +def test_common_replace_tainted_str(): + context_id = _get_iast_context_id() + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) + print(f"context_id {context_id}") s = "FooBar" set_ranges(s, [_RANGE1, _RANGE2], context_id) s2 = common_replace("lower", s) @@ -62,7 +75,8 @@ def test_common_replace_tainted_str(): def test_common_replace_tainted_bytes(): context_id = _get_iast_context_id() - + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) s = b"FooBar" set_ranges(s, [_RANGE1, _RANGE2], context_id) s2 = common_replace("lower", s) @@ -72,7 +86,8 @@ def test_common_replace_tainted_bytes(): def test_common_replace_tainted_bytearray(): context_id = _get_iast_context_id() - + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) s = b"FooBar" set_ranges(s, [_RANGE1, _RANGE2], context_id) s2 = common_replace("lower", s) diff --git a/tests/appsec/iast/conftest.py b/tests/appsec/iast/conftest.py index 21c423821a1..566c996c321 100644 --- a/tests/appsec/iast/conftest.py +++ b/tests/appsec/iast/conftest.py @@ -11,6 +11,7 @@ from ddtrace.appsec._iast._overhead_control_engine import oce from ddtrace.appsec._iast._patch_modules import _testing_unpatch_iast from ddtrace.appsec._iast._patches.json_tainting import patch as json_patch +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_free_slots_number from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size from ddtrace.appsec._iast.taint_sinks.code_injection import patch as code_injection_patch @@ -62,6 +63,7 @@ class MockSpan: ), override_env(env), ): + initialize_native_state() assert debug_context_array_size() == 2 assert debug_context_array_free_slots_number() > 0 span = MockSpan() diff --git a/tests/appsec/iast/fixtures/integration/main_configure.py b/tests/appsec/iast/fixtures/integration/main_configure.py index 5d8ff5c5467..b6c25d75afc 100644 --- a/tests/appsec/iast/fixtures/integration/main_configure.py +++ b/tests/appsec/iast/fixtures/integration/main_configure.py @@ -36,10 +36,7 @@ def main(): # Enabled by env var but then disabled with ``tracer.configure`` assert not asm_config._iast_enabled # Module was loaded before - if sys.version_info[:2] < (3, 14): - assert "ddtrace.appsec._iast.processor" in sys.modules - else: - assert "ddtrace.appsec._iast.processor" not in sys.modules + assert "ddtrace.appsec._iast.processor" in sys.modules # But processor is not used by the tracer for i in tracer._span_processors: assert i.__class__.__name__ != "AppSecIastSpanProcessor" diff --git a/tests/appsec/iast/taint_tracking/conftest.py b/tests/appsec/iast/taint_tracking/conftest.py index 918d09ec4df..db0dc87ba68 100644 --- a/tests/appsec/iast/taint_tracking/conftest.py +++ b/tests/appsec/iast/taint_tracking/conftest.py @@ -1,5 +1,6 @@ import pytest +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size from tests.appsec.iast.iast_utils import _end_iast_context_and_oce from tests.appsec.iast.iast_utils import _start_iast_context_and_oce @@ -11,6 +12,7 @@ def iast_request(): with override_global_config( dict(_iast_enabled=True, _iast_is_testing=True, _iast_deduplication_enabled=False, _iast_request_sampling=100.0) ): + initialize_native_state() assert debug_context_array_size() == 2 _start_iast_context_and_oce() try: diff --git a/tests/appsec/iast/taint_tracking/test_native_taint_range.py b/tests/appsec/iast/taint_tracking/test_native_taint_range.py index 4e3ddc621b2..e5aca37c2c7 100644 --- a/tests/appsec/iast/taint_tracking/test_native_taint_range.py +++ b/tests/appsec/iast/taint_tracking/test_native_taint_range.py @@ -64,9 +64,6 @@ def test_source_origin_refcount(): _SOURCE1 = Source(name="name", value="value", origin=OriginType.COOKIE) _SOURCE2 = Source(name="name2", value="value2", origin=OriginType.BODY) -_RANGE1 = TaintRange(0, 2, _SOURCE1) -_RANGE2 = TaintRange(1, 3, _SOURCE2) - def test_unicode_fast_tainting(): for i in range(5000): @@ -309,6 +306,8 @@ def test_collisions_bytearray(): def test_set_get_ranges_str(): + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) s1 = "abcde😁" s2 = "defg" assert taint_pyobject_with_ranges(s1, [_RANGE1, _RANGE2]) @@ -318,6 +317,8 @@ def test_set_get_ranges_str(): def test_set_get_ranges_other(): + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) s1 = 12345 s2 = None assert set_ranges(s1, [_RANGE1, _RANGE2], _get_iast_context_id()) is False @@ -328,6 +329,8 @@ def test_set_get_ranges_other(): def test_set_get_ranges_bytes(): + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) b1 = b"ABCDE" b2 = b"DEFG" assert taint_pyobject_with_ranges(b1, [_RANGE2, _RANGE1]) @@ -336,6 +339,8 @@ def test_set_get_ranges_bytes(): def test_set_get_ranges_bytearray(): + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) b1 = bytearray(b"abcdef") b2 = bytearray(b"abcdef") assert taint_pyobject_with_ranges(b1, [_RANGE1, _RANGE2]) @@ -423,6 +428,8 @@ def test_shift_taint_ranges(source, vulnerability_type): def test_are_all_text_all_ranges(): + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) s1 = "abc123" s2 = "ghijk" s3 = "xyzv" @@ -459,6 +466,8 @@ def test_are_all_text_all_ranges(): def test_get_range_by_hash(): + _RANGE1 = TaintRange(0, 2, _SOURCE1) + _RANGE2 = TaintRange(1, 3, _SOURCE2) hash_r1 = hash(_RANGE1) assert hash_r1 == _RANGE1.__hash__() hash_r2_call = hash(_RANGE2) diff --git a/tests/appsec/iast/test_fork_handler_regression.py b/tests/appsec/iast/test_fork_handler_regression.py index dbd6942f4bc..fd82610ed57 100644 --- a/tests/appsec/iast/test_fork_handler_regression.py +++ b/tests/appsec/iast/test_fork_handler_regression.py @@ -31,6 +31,7 @@ def test_fork_handler_callable(iast_context_defaults): """Verify that _reset_iast_after_fork is callable and disables IAST.""" from ddtrace.appsec._iast import _disable_iast_after_fork + from ddtrace.appsec._iast._taint_tracking import reset_native_state from ddtrace.internal.settings.asm import config as asm_config # Should not raise any exception @@ -39,8 +40,9 @@ def test_fork_handler_callable(iast_context_defaults): _disable_iast_after_fork() # Fork handler should disable IAST assert asm_config._iast_enabled is False, "IAST should be disabled after fork" - # Restore for other tests + # Restore for other tests - reinitialize native state for clean slate asm_config._iast_enabled = original_state + reset_native_state() # Reinitialize to clean state for next test except Exception as e: pytest.fail(f"Fork handler raised unexpected exception: {e}") @@ -49,6 +51,7 @@ def test_fork_handler_with_active_context(iast_context_defaults): """Verify fork handler disables IAST and clears context when active.""" from ddtrace.appsec._iast import _disable_iast_after_fork from ddtrace.appsec._iast._taint_tracking import is_tainted + from ddtrace.appsec._iast._taint_tracking import reset_native_state from ddtrace.internal.settings.asm import config as asm_config _start_iast_context_and_oce() @@ -67,8 +70,9 @@ def test_fork_handler_with_active_context(iast_context_defaults): # After reset, we should be able to call these safely (they're no-ops now) _end_iast_context_and_oce() - # Restore for other tests + # Restore for other tests - reinitialize native state for clean slate asm_config._iast_enabled = original_state + reset_native_state() # Reinitialize to clean state for next test @pytest.mark.skip(reason="multiprocessing fork doesn't work correctly in ddtrace-py 4.0") @@ -80,6 +84,12 @@ def test_multiprocessing_with_iast_no_segfault(iast_context_defaults): The fork handler should detect the active state and disable IAST in the child to prevent segmentation faults from corrupted native extension state. """ + import multiprocessing + + # Python 3.14+ defaults to 'forkserver' which requires pickling. + # Force 'fork' method for this test since we use local functions. + original_start_method = multiprocessing.get_start_method(allow_none=True) + multiprocessing.set_start_method("fork", force=True) def child_process_work(queue): """Child process where IAST should be disabled.""" @@ -127,6 +137,10 @@ def child_process_work(queue): assert result[2] == 0, "Child should have 0 tainted objects (IAST disabled)" assert result[3] is False, "Objects should not be tainted in child (IAST disabled)" + # Restore original start method + if original_start_method: + multiprocessing.set_start_method(original_start_method, force=True) + @pytest.mark.skip(reason="multiprocessing fork doesn't work correctly in ddtrace-py 4.0") def test_multiple_fork_operations(iast_context_defaults): @@ -136,6 +150,12 @@ def test_multiple_fork_operations(iast_context_defaults): Each child process should have IAST safely disabled by the fork handler, ensuring no crashes occur even with multiple forks. """ + import multiprocessing + + # Python 3.14+ defaults to 'forkserver' which requires pickling. + # Force 'fork' method for this test since we use local functions. + original_start_method = multiprocessing.get_start_method(allow_none=True) + multiprocessing.set_start_method("fork", force=True) def simple_child_work(queue, child_id): """Simple child process work - IAST will be disabled.""" @@ -178,6 +198,10 @@ def simple_child_work(queue, child_id): assert result[0] == "success", f"Child failed: {result}" assert result[2] is False, f"IAST should be disabled in child {result[1]}" + # Restore original start method + if original_start_method: + multiprocessing.set_start_method(original_start_method, force=True) + def test_fork_with_os_fork_no_segfault(iast_context_defaults): """ @@ -227,6 +251,8 @@ def test_fork_with_os_fork_no_segfault(iast_context_defaults): _, status = os.waitpid(pid, 0) exit_code = os.WEXITSTATUS(status) assert exit_code == 0, f"Child process failed with exit code {exit_code}" + # Clean up the manually started context to avoid leaving context slots occupied + _end_iast_context_and_oce() def test_fork_handler_clears_state(iast_context_defaults): @@ -275,6 +301,12 @@ def test_eval_in_forked_process(iast_context_defaults): With IAST disabled in the child, eval() should work normally without any instrumentation or tainting. """ + import multiprocessing + + # Python 3.14+ defaults to 'forkserver' which requires pickling. + # Force 'fork' method for this test since we use local functions. + original_start_method = multiprocessing.get_start_method(allow_none=True) + multiprocessing.set_start_method("fork", force=True) def child_eval_work(queue): """Child process with IAST disabled.""" @@ -316,6 +348,10 @@ def child_eval_work(queue): assert result[2] is False, "IAST should be disabled in child" assert result[3] is False, "Code should not be tainted (IAST disabled)" + # Restore original start method + if original_start_method: + multiprocessing.set_start_method(original_start_method, force=True) + def test_early_fork_keeps_iast_enabled(): """ @@ -326,12 +362,13 @@ def test_early_fork_keeps_iast_enabled(): remain enabled in the child and work normally. """ from ddtrace.appsec._iast import _disable_iast_after_fork + from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking import is_tainted from ddtrace.internal.settings.asm import config as asm_config # Ensure IAST is enabled but NO context is active (simulating early fork) # Don't call _start_iast_context_and_oce() - this simulates pre-fork state - + initialize_native_state() original_state = asm_config._iast_enabled asm_config._iast_enabled = True diff --git a/tests/appsec/iast/test_safe_wrappers_and_initialization.py b/tests/appsec/iast/test_safe_wrappers_and_initialization.py new file mode 100644 index 00000000000..5121408de61 --- /dev/null +++ b/tests/appsec/iast/test_safe_wrappers_and_initialization.py @@ -0,0 +1,264 @@ +""" +Unit tests for safe wrapper functions and native state initialization. + +These tests verify that: +1. Safe wrapper functions handle uninitialized state gracefully +2. initialize_native_state() properly initializes the native module +3. reset_native_state() properly cleans up state +4. No crashes occur when calling IAST functions before initialization +""" + +import subprocess +import sys + +import pytest + + +@pytest.mark.skip_iast_check_logs +class TestUninitializedStateHandling: + """Test that IAST functions handle uninitialized state gracefully.""" + + def test_context_functions_before_init_return_safe_defaults(self): + """Test that context functions return safe defaults before initialization. + + This test runs in a subprocess to ensure complete isolation from other + tests that may have already initialized the native state. + """ + # Run in subprocess for complete isolation + code = """ +import sys +# Import without calling initialize_native_state() +from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_free_slots_number +from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size +from ddtrace.appsec._iast._taint_tracking._context import debug_num_tainted_objects +from ddtrace.appsec._iast._taint_tracking._context import finish_request_context +from ddtrace.appsec._iast._taint_tracking._context import start_request_context + +# These should all return safe defaults without crashing +size = debug_context_array_size() +assert size == 0, f"Should return 0 before initialization, got {size}" + +free_slots = debug_context_array_free_slots_number() +assert free_slots == 0, f"Should return 0 before initialization, got {free_slots}" + +ctx = start_request_context() +assert ctx is None, f"Should return None before initialization, got {ctx}" + +# This should not crash +finish_request_context(0) + +num = debug_num_tainted_objects(0) +assert num == 0, f"Should return 0 before initialization, got {num}" + +sys.exit(0) +""" + result = subprocess.run([sys.executable, "-c", code], capture_output=True, text=True) + if result.returncode != 0: + pytest.fail(f"Subprocess test failed:\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}") + + def test_taint_functions_before_init_do_not_crash(self): + """Test that taint functions don't crash before initialization.""" + from ddtrace.appsec._iast._taint_tracking._taint_objects_base import is_pyobject_tainted + + test_str = "test string" + # This should not crash, should return False + result = is_pyobject_tainted(test_str) + assert result is False, "Should return False before initialization" + + +class TestNativeStateInitialization: + """Test the initialize_native_state() function.""" + + def test_initialize_native_state_creates_globals(self): + """Test that initialize_native_state() properly initializes globals.""" + from ddtrace.appsec._iast._taint_tracking import initialize_native_state + from ddtrace.appsec._iast._taint_tracking import reset_native_state + + # Reset to clean state + reset_native_state() + + # Initialize + initialize_native_state() + + # Verify context functions now work + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_free_slots_number + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size + from ddtrace.appsec._iast._taint_tracking._context import start_request_context + + size = debug_context_array_size() + assert size > 0, "Should return actual size after initialization" + + free_slots = debug_context_array_free_slots_number() + assert free_slots > 0, "Should return actual free slots after initialization" + + ctx = start_request_context() + assert ctx is not None, "Should return valid context ID after initialization" + + def test_taint_operations_work_after_initialization(self): + """Test that taint operations work correctly after initialization.""" + from ddtrace.appsec._iast._iast_request_context_base import IAST_CONTEXT + from ddtrace.appsec._iast._overhead_control_engine import oce + from ddtrace.appsec._iast._taint_tracking import initialize_native_state + from ddtrace.appsec._iast._taint_tracking import reset_native_state + from ddtrace.appsec._iast._taint_tracking._context import start_request_context + from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject + from ddtrace.appsec._iast._taint_tracking._taint_objects_base import is_pyobject_tainted + from ddtrace.internal.settings.asm import config as asm_config + from tests.utils import override_env + + # Reset and initialize with proper configuration + reset_native_state() + + # Configure IAST and overhead control engine + with override_env({"DD_IAST_REQUEST_SAMPLING": "100", "DD_IAST_MAX_CONCURRENT_REQUEST": "100"}): + asm_config._iast_enabled = True + oce.reconfigure() + initialize_native_state() + + # Create a request context and set it in the ContextVar + ctx_id = start_request_context() + assert ctx_id is not None, "Should return valid context ID after initialization" + IAST_CONTEXT.set(ctx_id) + + # Test tainting + tainted_str = taint_pyobject("sensitive data", source_name="test_source", source_value="test_value") + assert tainted_str is not None + + result = is_pyobject_tainted(tainted_str) + assert result is True, "String should be tainted after tainting operation" + + +class TestResetNativeState: + """Test the reset_native_state() function.""" + + def test_reset_native_state_clears_globals(self): + """Test that reset_native_state() properly cleans up and re-initializes.""" + from ddtrace.appsec._iast._taint_tracking import initialize_native_state + from ddtrace.appsec._iast._taint_tracking import reset_native_state + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_free_slots_number + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size + + # Initialize + initialize_native_state() + + size_before = debug_context_array_size() + assert size_before > 0, "Should have size after initialization" + + # Reset (which re-initializes) + reset_native_state() + + # Verify state is fresh (re-initialized, not nullptr) + size_after = debug_context_array_size() + assert size_after > 0, "Should have size after reset (which re-initializes)" + + free_slots_after = debug_context_array_free_slots_number() + assert free_slots_after > 0, "Should have free slots after reset (which re-initializes)" + + +class TestForkSafety: + """Test that fork safety mechanisms work correctly.""" + + def test_state_is_reset_in_child_process_after_fork(self): + """Test that child process has clean state after fork.""" + import os + + from ddtrace.appsec._iast._taint_tracking import initialize_native_state + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_free_slots_number + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size + from ddtrace.appsec._iast._taint_tracking._context import start_request_context + from ddtrace.appsec._iast._taint_tracking._taint_objects import taint_pyobject + from ddtrace.internal.settings.asm import config as asm_config + + # Enable IAST + asm_config._iast_enabled = True + + # Initialize and create tainted objects in parent + initialize_native_state() + ctx_id = start_request_context() + assert ctx_id is not None + + # Create some tainted objects in parent + for i in range(10): + test_str = f"tainted_string_{i}" + try: + taint_pyobject(test_str, source_name=f"source_{i}", source_value=f"value_{i}") + except Exception: + pass # Some tainting might fail, that's ok for this test + + # Fork + pid = os.fork() + + if pid == 0: + # Child process + try: + # Verify state was reset in child + child_size = debug_context_array_size() + child_free = debug_context_array_free_slots_number() + + # With pthread_atfork fix, child should have all slots free + if child_free == child_size and child_size > 0: + os._exit(0) # Success + else: + os._exit(1) # Failure - state not reset + except Exception: + os._exit(2) # Exception + else: + # Parent process + _, status = os.waitpid(pid, 0) + + if os.WIFSIGNALED(status): + signal_num = os.WTERMSIG(status) + pytest.fail(f"Child crashed with signal {signal_num}") + elif os.WIFEXITED(status): + exit_code = os.WEXITSTATUS(status) + if exit_code == 0: + # Test passed + pass + elif exit_code == 1: + pytest.fail("State was not reset in child process") + else: + pytest.fail(f"Child process error: {exit_code}") + + +class TestSafeWrappersPreventCrashes: + """Test that safe wrapper functions prevent crashes in error scenarios.""" + + def test_multiple_initializations_do_not_crash(self): + """Test that calling initialize multiple times doesn't crash.""" + from ddtrace.appsec._iast._taint_tracking._native import initialize_native_state + + # This should not crash + initialize_native_state() + initialize_native_state() + initialize_native_state() + + def test_multiple_resets_do_not_crash(self): + """Test that calling reset multiple times doesn't crash.""" + from ddtrace.appsec._iast._taint_tracking._native import initialize_native_state + from ddtrace.appsec._iast._taint_tracking._native import reset_native_state + + initialize_native_state() + reset_native_state() + reset_native_state() # Should not crash + + def test_operations_after_reset_return_safe_values(self): + """Test that operations after reset work correctly (reset re-initializes).""" + from ddtrace.appsec._iast._taint_tracking import initialize_native_state + from ddtrace.appsec._iast._taint_tracking import reset_native_state + from ddtrace.appsec._iast._taint_tracking._context import debug_context_array_size + from ddtrace.appsec._iast._taint_tracking._context import start_request_context + from ddtrace.appsec._iast._taint_tracking._taint_objects_base import is_pyobject_tainted + + # Initialize, then reset (which re-initializes) + initialize_native_state() + reset_native_state() + + # Operations should work (because reset re-initializes) + size = debug_context_array_size() + assert size > 0, "Should have size after reset (which re-initializes)" + + ctx = start_request_context() + assert ctx is not None, "Should be able to create context after reset" + + result = is_pyobject_tainted("test") + assert result is False, "Untainted string should return False" diff --git a/tests/appsec/iast_memcheck/test_iast_mem_check.py b/tests/appsec/iast_memcheck/test_iast_mem_check.py index fd5a3cb0595..c80b7d9466d 100644 --- a/tests/appsec/iast_memcheck/test_iast_mem_check.py +++ b/tests/appsec/iast_memcheck/test_iast_mem_check.py @@ -83,12 +83,11 @@ def test_propagation_memory_check(origin1, origin2, iast_context_defaults): assert len(span_report.vulnerabilities) > 0 assert len(get_tainted_ranges(result)) == 1 - if _num_objects_tainted == 0: - _num_objects_tainted = _num_objects_tainted_in_request() - assert _num_objects_tainted > 0 - if _debug_context_array_size == 0: - _debug_context_array_size = debug_context_array_size() - assert _debug_context_array_size > 0 + _num_objects_tainted = _num_objects_tainted_in_request() + assert _num_objects_tainted > 0 + + _debug_context_array_size = debug_context_array_size() + assert _debug_context_array_size > 0 # Some tainted pyobject is freed, and Python may reuse the memory address # hence the number of tainted objects may be the same or less @@ -180,3 +179,108 @@ def test_stacktrace_memory_check_direct_call(): assert line_number > 0 assert method == "test_stacktrace_memory_check_direct_call" assert not class_ + + +@pytest.mark.limit_leaks("2.0 KB", filter_fn=IASTFilter()) +@pytest.mark.parametrize( + "input_str, start, stop, step", + [ + ("abcdefghijklmnopqrstuvwxyz", 0, 10, 1), + ("abcdefghijklmnopqrstuvwxyz", 5, 15, 1), + ("abcdefghijklmnopqrstuvwxyz", 0, 20, 2), + ("abcdefghijklmnopqrstuvwxyz", 1, -1, 1), + (b"abcdefghijklmnopqrstuvwxyz", 0, 10, 1), + (b"abcdefghijklmnopqrstuvwxyz", 5, 15, 1), + (bytearray(b"abcdefghijklmnopqrstuvwxyz"), 0, 10, 1), + (bytearray(b"abcdefghijklmnopqrstuvwxyz"), 5, 15, 1), + ], +) +def test_slice_memory_check(input_str, start, stop, step, iast_context_defaults): + """Test that slice_aspect doesn't leak memory. + + This test verifies the fix for the slice aspect memory issue where + the old implementation created O(n) intermediate data structures. + The new optimized version should use O(m) memory where m = number of taint ranges. + """ + from ddtrace.appsec._iast._taint_tracking.aspects import slice_aspect + + _num_objects_tainted = 0 + _debug_context_array_size = 0 + _iast_finish_request() + + for iteration in range(LOOPS): + _iast_start_request() + + # Taint the input string + tainted_string = taint_pyobject( + input_str, source_name="test_input", source_value=input_str, source_origin=OriginType.PARAMETER + ) + + # Perform slice operation + result = slice_aspect(tainted_string, start, stop, step) + + # Verify the result is properly tainted + tainted_ranges = get_tainted_ranges(result) + assert len(tainted_ranges) >= 0 # May be 0 if slice doesn't overlap with tainted range + + # Track memory metrics + _num_objects_tainted = _num_objects_tainted_in_request() + assert _num_objects_tainted > 0 + + _debug_context_array_size = debug_context_array_size() + assert _debug_context_array_size > 0 + + # Verify no memory leak - context array size should remain stable + assert _debug_context_array_size == debug_context_array_size() + + _iast_finish_request() + + +@pytest.mark.limit_leaks("2.0 KB", filter_fn=IASTFilter()) +def test_slice_memory_check_repeated_operations(iast_context_defaults): + """Test that repeated slice operations don't accumulate memory. + + This simulates the benchmark scenario where slice_aspect is called + many times on the same or similar strings. The old implementation + would create O(n) intermediate arrays each time, causing memory to + accumulate. The new implementation should remain stable. + """ + from ddtrace.appsec._iast._taint_tracking.aspects import slice_aspect + + _iast_finish_request() + _iast_start_request() + + # Taint a test string + test_string = "abcdefghijklmnopqrstuvwxyz0123456789" + tainted_string = taint_pyobject( + test_string, source_name="test_input", source_value=test_string, source_origin=OriginType.PARAMETER + ) + + # Get baseline + initial_context_size = debug_context_array_size() + assert initial_context_size > 0 + + # Perform many slice operations (simulating benchmark workload) + for i in range(100): + # Various slice operations + result1 = slice_aspect(tainted_string, 0, 10, 1) + result2 = slice_aspect(tainted_string, 5, 15, 1) + result3 = slice_aspect(tainted_string, 10, 20, 2) + result4 = slice_aspect(tainted_string, 1, -1, 1) + + # Verify results are tainted + assert len(get_tainted_ranges(result1)) > 0 + assert len(get_tainted_ranges(result2)) > 0 + assert len(get_tainted_ranges(result3)) > 0 + assert len(get_tainted_ranges(result4)) > 0 + + # Context array size should not have grown significantly + final_context_size = debug_context_array_size() + + # Allow small variation but no significant growth + # With the old buggy code, this would grow proportionally to iterations + assert final_context_size <= initial_context_size + 10, ( + f"Context size grew from {initial_context_size} to {final_context_size}" + ) + + _iast_finish_request() diff --git a/tests/appsec/integrations/django_tests/django_app/views.py b/tests/appsec/integrations/django_tests/django_app/views.py index 2ecbf4c3b70..4fd147a2f94 100644 --- a/tests/appsec/integrations/django_tests/django_app/views.py +++ b/tests/appsec/integrations/django_tests/django_app/views.py @@ -504,7 +504,7 @@ def vulnerable_request_downstream(request): port = request.GET.get("port", "8050") http_poolmanager = urllib3.PoolManager(num_pools=1) # Sending a GET request and getting back response as HTTPResponse object. - response = http_poolmanager.request("GET", f"http://localhost:{port}/appsec/returnheaders") + response = http_poolmanager.request("GET", f"http://localhost:{port}/appsec/returnheaders/") http_poolmanager.clear() return HttpResponse(response.data, status=200, content_type="application/json") diff --git a/tests/appsec/integrations/django_tests/test_iast_django_testagent.py b/tests/appsec/integrations/django_tests/test_iast_django_testagent.py index 51a8518667d..5d3b415f674 100644 --- a/tests/appsec/integrations/django_tests/test_iast_django_testagent.py +++ b/tests/appsec/integrations/django_tests/test_iast_django_testagent.py @@ -229,33 +229,7 @@ def test_iast_vulnerable_request_downstream_django(server, config, iast_test_tok cfg_env.update(env) config = dict(config) config["env"] = cfg_env - # TODO(APPSEC-59081): without use_ddtrace_cmd=False it raises a `NameError: name 'sys' is not defined` - # File "/proj/dd-trace-py/ddtrace/internal/module.py", line 301, in _exec_module - # self.loader.exec_module(module) - # File "", line 883, in exec_module - # File "", line 241, in _call_with_frames_removed - # File "/proj/dd-trace-py/tests/appsec/integrations/django_tests/django_app/wsgi.py", line 13, in - # application = get_wsgi_application() - # File "/proj/venv310/site-packages/django/core/wsgi.py", line 12, in get_wsgi_application - # django.setup(set_prefix=False) - # File "/proj/venv310/site-packages/django/__init__.py", line 24, in setup - # apps.populate(settings.INSTALLED_APPS) - # File "/proj/dd-trace-py/ddtrace/contrib/internal/trace_utils.py", line 315, in wrapper - # return func(mod, pin, wrapped, instance, args, kwargs) - # File "/proj/dd-trace-py/ddtrace/contrib/internal/django/patch.py", line 124, in traced_populate - # ret = func(*args, **kwargs) - # File "/proj/venv310/site-packages/django/apps/registry.py", line 91, in populate - # app_config = AppConfig.create(entry) - # File "/proj/venv310/site-packages/django/apps/config.py", line 121, in create - # if module_has_submodule(app_module, APPS_MODULE_NAME): - # File "/proj/venv310/site-packages/django/utils/module_loading.py", line 85, in module_has_submodule - # return importlib_find(full_module_name, package_path) is not None - # File "/python310importlib/util.py", line 103, in find_spec - # return _find_spec(fullname, parent_path) - # File "/python310importlib/_bootstrap.py", line 923, in _find_spec - # meta_path = sys.meta_path - # NameError: name 'sys' is not defined - with server(use_ddtrace_cmd=False, iast_enabled="true", token=iast_test_token, port=8050, **config) as context: + with server(use_ddtrace_cmd=True, iast_enabled="true", token=iast_test_token, port=8050, **config) as context: _, django_client, pid = context trace_id = 1212121212121212121 diff --git a/tests/appsec/integrations/fastapi_tests/test_fastapi_appsec_iast.py b/tests/appsec/integrations/fastapi_tests/test_fastapi_appsec_iast.py index 9c72fed6b00..38d813279dc 100644 --- a/tests/appsec/integrations/fastapi_tests/test_fastapi_appsec_iast.py +++ b/tests/appsec/integrations/fastapi_tests/test_fastapi_appsec_iast.py @@ -18,6 +18,7 @@ from ddtrace.appsec._constants import IAST from ddtrace.appsec._iast._handlers import _on_iast_fastapi_patch from ddtrace.appsec._iast._overhead_control_engine import oce +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking import origin_to_str from ddtrace.appsec._iast._taint_tracking._taint_objects_base import get_tainted_ranges from ddtrace.appsec._iast.constants import VULN_INSECURE_COOKIE @@ -74,6 +75,7 @@ def _restore_request_validations(original_validate): def _aux_appsec_prepare_tracer(tracer): + initialize_native_state() _on_iast_fastapi_patch() patch_fastapi() patch_sqlite_sqli() diff --git a/tests/appsec/integrations/fastapi_tests/test_iast_mcp_testagent.py b/tests/appsec/integrations/fastapi_tests/test_iast_mcp_testagent.py index 6a25bb4a38b..380c961d032 100644 --- a/tests/appsec/integrations/fastapi_tests/test_iast_mcp_testagent.py +++ b/tests/appsec/integrations/fastapi_tests/test_iast_mcp_testagent.py @@ -8,6 +8,7 @@ """ from contextlib import asynccontextmanager +import json import sys import pytest @@ -564,12 +565,21 @@ async def test_iast_mcp_sse_streaming_multiple_calls(iast_test_token): # Call multiple tools in sequence result1 = await session.call_tool("calculator", {"operation": "add", "a": 10, "b": 20}) assert not result1.isError + assert result1.content + assert len(result1.content) > 0 + assert json.loads(result1.content[0].text)["result"] == 30 result2 = await session.call_tool("get_weather", {"location": "San Francisco"}) assert not result2.isError + assert len(result2.content) > 0 + assert result2.content[0].text == "Weather in San Francisco is 72°F" result3 = await session.call_tool("calculator", {"operation": "multiply", "a": 5, "b": 8}) assert not result3.isError + assert not result3.isError + assert result3.content + assert len(result3.content) > 0 + assert json.loads(result3.content[0].text)["result"] == 40 # Finally call the vulnerable one result4 = await session.call_tool("execute_command", {"command": "multi_stream_test.txt"}) @@ -584,3 +594,83 @@ async def test_iast_mcp_sse_streaming_multiple_calls(iast_test_token): # Should have spans with IAST from HTTP requests assert len(spans_with_iast) >= 1 + + +@pytest.mark.asyncio +async def test_iast_mcp_sse_streaming_stress_test_hundreds(iast_test_token): + """Stress test: Call the same endpoint hundreds of times via HTTP/SSE streaming. + + This test repeatedly calls the same MCP tool endpoint hundreds of times to detect: + - Memory leaks + - Context corruption with repeated operations + - Segfaults that only occur after many iterations + - IAST taint tracking issues with high-volume requests + + This is critical for production readiness where systems handle high throughput. + """ + NUM_ITERATIONS = 100 + + with uvicorn_server( + python_cmd=sys.executable, + iast_enabled="true", + token=iast_test_token, + port=8053, + app="tests.appsec.integrations.fastapi_tests.mcp_app:app", + env=MCP_IAST_ENV, + ) as context: + _, fastapi_client, pid = context + + headers = { + "User-Agent": "DatadogIAST/1.0 (MCP-SSE-Test; Stress-Test-Hundreds)", + "Referer": "https://test.datadoghq.com/mcp-sse-stress", + "Accept": "text/event-stream", + "X-Test-Session": "mcp-sse-stress-test", + } + + # Ensure server is ready + response = fastapi_client.get("/", headers=headers) + assert response.status_code == 200 + + # Make hundreds of calls to the same endpoint via HTTP/SSE streaming + async with mcp_client_session(8053) as session: + # Initialize + await session.initialize() + + # Track errors and successful calls + successful_calls = 0 + errors = [] + + # Call the same tool hundreds of times + for i in range(NUM_ITERATIONS): + try: + result = await session.call_tool("calculator", {"operation": "add", "a": i, "b": i * 2}) + assert result is not None, f"Result is None at iteration {i}" + assert not result.isError, f"Tool returned error at iteration {i}: {result}" + assert result.content, f"No content in result at iteration {i}" + assert len(result.content) > 0, f"Empty content at iteration {i}" + + expected_result = i + (i * 2) + actual_result = json.loads(result.content[0].text)["result"] + assert actual_result == expected_result, ( + f"Wrong result at iteration {i}: expected {expected_result}, got {actual_result}" + ) + + successful_calls += 1 + except Exception as e: + errors.append((i, str(e))) + + # Assert that most calls succeeded (allow for some transient failures) + assert successful_calls >= NUM_ITERATIONS * 0.95, ( + f"Too many failures: {len(errors)}/{NUM_ITERATIONS}. Errors: {errors[:10]}" + ) + assert len(errors) == 0, f"Errors occurred during stress test: {errors}" + + response_tracer = _get_span(iast_test_token) + spans_with_iast = [] + for trace in response_tracer: + for span in trace: + if span.get("metrics", {}).get("_dd.iast.enabled") == 1.0: + spans_with_iast.append(span) + + # Should have spans with IAST from HTTP requests + assert len(spans_with_iast) >= 1 diff --git a/tests/appsec/integrations/flask_tests/test_iast_flask.py b/tests/appsec/integrations/flask_tests/test_iast_flask.py index 764cf46bed3..3e93f43cbcb 100644 --- a/tests/appsec/integrations/flask_tests/test_iast_flask.py +++ b/tests/appsec/integrations/flask_tests/test_iast_flask.py @@ -9,6 +9,7 @@ from ddtrace.appsec._iast._iast_request_context_base import _iast_start_request from ddtrace.appsec._iast._overhead_control_engine import oce from ddtrace.appsec._iast._patches.json_tainting import patch as patch_json +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast._taint_tracking._taint_objects_base import is_pyobject_tainted from ddtrace.appsec._iast.constants import VULN_INSECURE_COOKIE from ddtrace.appsec._iast.constants import VULN_NO_HTTPONLY_COOKIE @@ -48,6 +49,7 @@ def setUp(self): _iast_request_sampling=100.0, ) ): + initialize_native_state() patch_sqlite_sqli() patch_insecure_cookie() patch_header_injection() diff --git a/tests/appsec/integrations/packages_tests/conftest.py b/tests/appsec/integrations/packages_tests/conftest.py index 9de3761416b..26cd6bae29b 100644 --- a/tests/appsec/integrations/packages_tests/conftest.py +++ b/tests/appsec/integrations/packages_tests/conftest.py @@ -1,5 +1,6 @@ import pytest +from ddtrace.appsec._iast._taint_tracking import initialize_native_state from ddtrace.appsec._iast.taint_sinks.code_injection import patch as code_injection_patch from ddtrace.contrib.internal.psycopg.patch import patch as psycopg_patch from ddtrace.contrib.internal.psycopg.patch import unpatch as psycopg_unpatch @@ -17,6 +18,7 @@ def iast_create_context(): with override_global_config( dict(_iast_enabled=True, _iast_deduplication_enabled=False, _iast_request_sampling=100.0) ): + initialize_native_state() sqlalchemy_patch() psycopg_patch() sqli_sqlite_patch()