diff --git a/CMakeLists.txt b/CMakeLists.txt index f29eae90b3..e9c5c0661c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,12 @@ if (MSVC) add_compile_options($<$:/Zc:preprocessor>) add_compile_options($<$:/Zc:__cplusplus>) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/Zc:preprocessor -Xcompiler=/Zc:__cplusplus") + # nvcc's front-end has a known bug (fixed in NVCC 13.1) where the fmt 11.x + # literal-encoding probe (fmt/base.h:is_utf8_enabled) always returns false, + # causing a static_assert failure. Disable fmt's Unicode check project-wide; + # without /utf-8 the MSVC host compiler also sees use_utf8=false, so all TUs + # agree and there is no ODR inconsistency. + add_compile_definitions(FMT_UNICODE=0) endif () find_package(CUDAToolkit REQUIRED) @@ -39,6 +45,19 @@ option(BUILD_FAST_MATH "Build in fast math mode" ON) include(FetchContent) +FetchContent_Declare( + fmt + GIT_REPOSITORY https://github.com/fmtlib/fmt.git + GIT_TAG 11.1.4 + GIT_SHALLOW ON + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE + EXCLUDE_FROM_ALL +) +FetchContent_MakeAvailable(fmt) +set_target_properties(fmt PROPERTIES POSITION_INDEPENDENT_CODE ON) +include_directories(${fmt_SOURCE_DIR}/include) + if (BUILD_TEST) FetchContent_Declare( Catch2 @@ -269,6 +288,11 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --std=c++${CXX_STD}") +# Suppress nvcc warning 128 (loop not reachable) in fmt headers - nvcc false positive +# Suppress nvcc warning 27 (character value out of range) from fmt/format.h's +# fractional_part_rounding_thresholds which uses U32 string literals with values +# exceeding 0x10FFFF - harmless, pending fix in fmtlib (PR #4719) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --diag-suppress=128 --diag-suppress=27") string(REPLACE "-O2" "" CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE}") string(REPLACE "-O2" "" CMAKE_CUDA_FLAGS_RELEASE "${CMAKE_CUDA_FLAGS_RELEASE}") diff --git a/src/turbomind/comm/CMakeLists.txt b/src/turbomind/comm/CMakeLists.txt index 07d4c91fd7..c9a4e5873f 100644 --- a/src/turbomind/comm/CMakeLists.txt +++ b/src/turbomind/comm/CMakeLists.txt @@ -5,11 +5,11 @@ cmake_minimum_required(VERSION 3.11) find_package(Threads) add_library(host_comm STATIC host_comm.cc thread_comm.cc) -target_link_libraries(host_comm PRIVATE core logger Threads::Threads) +target_link_libraries(host_comm PRIVATE core Threads::Threads) set_property(TARGET host_comm PROPERTY POSITION_INDEPENDENT_CODE ON) add_library(device_comm STATIC device_comm.cc) -target_link_libraries(device_comm PRIVATE core logger) +target_link_libraries(device_comm PRIVATE core) set_property(TARGET device_comm PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET device_comm PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) diff --git a/src/turbomind/comm/cuda_ipc/CMakeLists.txt b/src/turbomind/comm/cuda_ipc/CMakeLists.txt index 606f207143..d513102268 100644 --- a/src/turbomind/comm/cuda_ipc/CMakeLists.txt +++ b/src/turbomind/comm/cuda_ipc/CMakeLists.txt @@ -16,7 +16,7 @@ target_link_libraries(cuda_ipc_comm PRIVATE core cuda_utils CUDA::cuda_driver - logger) + ) set_property(TARGET cuda_ipc_comm PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET cuda_ipc_comm PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) diff --git a/src/turbomind/comm/cuda_ipc/cuda_ipc_comm.cu b/src/turbomind/comm/cuda_ipc/cuda_ipc_comm.cu index eb3bbd36a3..c4ad4bbe98 100644 --- a/src/turbomind/comm/cuda_ipc/cuda_ipc_comm.cu +++ b/src/turbomind/comm/cuda_ipc/cuda_ipc_comm.cu @@ -17,8 +17,8 @@ #include "src/turbomind/comm/cuda_ipc/semaphore.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" namespace turbomind::comm { @@ -156,7 +156,7 @@ CudaIpcCommImpl::~CudaIpcCommImpl() } for (const auto& a : allocation_) { - TM_LOG_WARNING("[COMM][%d] Allocation (%p, %lu) is not freed", global_rank_, a.uc_beg, a.size); + TM_LOG_WARN("Rank {}: Allocation ({}, {}) is not freed", global_rank_, a.uc_beg, a.size); } cudaStreamSynchronize(0); @@ -220,7 +220,7 @@ void CudaIpcCommImpl::Free(void* ptr) allocation_.erase(it); } else { - TM_LOG_WARNING("[TM][COMM][%d] Freeing %p which is not allocated by this module", global_rank_, ptr); + TM_LOG_WARN("Rank {}: Freeing {} which is not allocated by this module", global_rank_, ptr); } } @@ -230,7 +230,7 @@ void CudaIpcCommImpl::Register(void* ptr, size_t size) auto& symm = groups_.at(0).symmetric; if (symm.find(ptr) != symm.end()) { - TM_LOG_WARNING("[TM][COMM][%d] Duplicated registration on (%p, %lu)", global_rank_, ptr, size); + TM_LOG_WARN("Rank {}: Duplicated registration on ({}, {})", global_rank_, ptr, size); return; } @@ -317,7 +317,7 @@ void CudaIpcCommImpl::Deregister(void* ptr) Deregister(s.extract(it).value()); } else { - TM_LOG_WARNING("[TM][COMM][%d] Deregistering non-registered address %p", global_rank_, ptr); + TM_LOG_WARN("Rank {}: Deregistering non-registered address {}", global_rank_, ptr); } } } diff --git a/src/turbomind/comm/env.h b/src/turbomind/comm/env.h index a8a475e195..bead5876b3 100644 --- a/src/turbomind/comm/env.h +++ b/src/turbomind/comm/env.h @@ -7,7 +7,7 @@ #include #include -#include "src/turbomind/utils/logger.h" +#include "src/turbomind/core/logger.h" namespace turbomind { @@ -40,7 +40,7 @@ auto GetEnv() if (is_set) { std::stringstream ss; ss << x; - TM_LOG_INFO("[%s] %s=%s", E::prefix, E::name, ss.str().c_str()); + TM_LOG_INFO("{} {}={}", E::prefix, E::name, ss.str()); } return x; }(); diff --git a/src/turbomind/comm/gloo/CMakeLists.txt b/src/turbomind/comm/gloo/CMakeLists.txt index 395b7ee55a..141c6ff948 100644 --- a/src/turbomind/comm/gloo/CMakeLists.txt +++ b/src/turbomind/comm/gloo/CMakeLists.txt @@ -33,7 +33,7 @@ add_library(gloo_comm STATIC tcp_store.cc ) set_property(TARGET gloo_comm PROPERTY POSITION_INDEPENDENT_CODE ON) -target_link_libraries(gloo_comm PUBLIC gloo host_comm logger xgrammar) +target_link_libraries(gloo_comm PUBLIC gloo host_comm xgrammar) add_executable(test_ipc_comm test_ipc_comm.cc) target_link_libraries(test_ipc_comm PRIVATE gloo_comm Threads::Threads) diff --git a/src/turbomind/comm/gloo/gloo_comm.cc b/src/turbomind/comm/gloo/gloo_comm.cc index 10bb4a7974..6e664613f2 100644 --- a/src/turbomind/comm/gloo/gloo_comm.cc +++ b/src/turbomind/comm/gloo/gloo_comm.cc @@ -23,7 +23,7 @@ #include "src/turbomind/comm/gloo/tcp_store.h" #include "src/turbomind/comm/host_comm.h" -#include "src/turbomind/utils/logger.h" +#include "src/turbomind/core/logger.h" namespace turbomind::comm { @@ -346,7 +346,7 @@ class GlooGroupId: public HostGroupId { void Initialize() override { info_ = GlobalStoreFactory::Instance().New(); - TM_LOG_INFO("[TM][COMM] GlooGroupId=%s", info_.c_str()); + TM_LOG_INFO("GlooGroupId={}", info_); } void Export(std::ostream& os) override diff --git a/src/turbomind/comm/gloo/tcp_store.cc b/src/turbomind/comm/gloo/tcp_store.cc index 8de75f508b..5b74a9e098 100644 --- a/src/turbomind/comm/gloo/tcp_store.cc +++ b/src/turbomind/comm/gloo/tcp_store.cc @@ -9,7 +9,7 @@ #include #include "src/turbomind/comm/gloo/tcp_store.h" -#include "src/turbomind/utils/logger.h" +#include "src/turbomind/core/logger.h" namespace turbomind::comm { @@ -148,7 +148,7 @@ TCPStore::TCPStore(const std::string& host, int port) } } catch (const std::exception& e) { - TM_LOG_WARNING("[TM][COMM] Failed to connect to store after %d retries: %s", retry, e.what()); + TM_LOG_WARN("Failed to connect to store after {} retries: {}", retry, e.what()); std::this_thread::sleep_for(std::chrono::seconds(1)); retry += 1; } @@ -209,7 +209,7 @@ void TCPStore::wait(const std::vector& keys, const std::chrono::mil ss << key << " "; } ss << "]"; - TM_LOG_ERROR("[TM][COMM] %s, elapsed %lld s", ss.str().c_str(), elapsed.count()); + TM_LOG_ERROR("{}, elapsed {} s", ss.str(), elapsed.count()); throw std::runtime_error("Wait timeout for key(s): " + ss.str()); } std::this_thread::sleep_for(std::chrono::milliseconds(1000)); diff --git a/src/turbomind/comm/host_comm.h b/src/turbomind/comm/host_comm.h index f1da005583..94b4366a35 100644 --- a/src/turbomind/comm/host_comm.h +++ b/src/turbomind/comm/host_comm.h @@ -11,8 +11,8 @@ #include #include "src/turbomind/core/data_type.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/core/serdes.h" -#include "src/turbomind/utils/logger.h" namespace turbomind::comm { diff --git a/src/turbomind/comm/nccl/CMakeLists.txt b/src/turbomind/comm/nccl/CMakeLists.txt index 373558c84e..e35b66af38 100644 --- a/src/turbomind/comm/nccl/CMakeLists.txt +++ b/src/turbomind/comm/nccl/CMakeLists.txt @@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.11) add_library(nccl_comm STATIC nccl.cu) -target_link_libraries(nccl_comm PRIVATE rms_norm core ${NCCL_LIBRARIES} logger) +target_link_libraries(nccl_comm PRIVATE rms_norm core ${NCCL_LIBRARIES}) target_include_directories(nccl_comm PRIVATE ${NCCL_INCLUDE_DIRS}) set_property(TARGET nccl_comm PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/src/turbomind/comm/nccl/nccl.cu b/src/turbomind/comm/nccl/nccl.cu index 88c32cf166..a70e694bc7 100644 --- a/src/turbomind/comm/nccl/nccl.cu +++ b/src/turbomind/comm/nccl/nccl.cu @@ -13,15 +13,15 @@ #include "src/turbomind/comm/device_comm.h" #include "src/turbomind/comm/host_comm.h" #include "src/turbomind/core/check.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" #include "src/turbomind/utils/string_utils.h" #include "src/turbomind/kernels/norm/rms_norm.h" #define NCCLCHECK(e) \ if (auto ec = e; ec != ncclSuccess) { \ - auto msg = fmtstr("NCCL error %s:%d '%s'", __FILE__, __LINE__, ncclGetErrorString(ec)); \ + auto msg = fmt::format("NCCL error {}:{} '{}'", __FILE__, __LINE__, ncclGetErrorString(ec)); \ throw std::runtime_error(msg.c_str()); \ } @@ -76,16 +76,15 @@ static NcclApis& nccl_apis() }; if (version >= NCCL_VERSION(2, 27, 0)) { if (version < NCCL_VERSION(2, 28, 0)) { - TM_LOG_WARNING( - "[NCCL] Window registration may cause memory leaks in NCCL 2.27, use NCCL 2.28+ or disable the feature by setting NCCL_WIN_ENABLE=0."); + TM_LOG_WARN( + "Window registration may cause memory leaks in NCCL 2.27, use NCCL 2.28+ or disable the feature by setting NCCL_WIN_ENABLE=0."); } load_symbol(apis.ncclCommWindowRegister, "ncclCommWindowRegister"); load_symbol(apis.ncclCommWindowDeregister, "ncclCommWindowDeregister"); } else { - TM_LOG_WARNING( - "[NCCL] Window registration is not supported by NCCL %d, use NCCL 2.28+ for better performance.", - version); + TM_LOG_WARN("Window registration is not supported by NCCL {}, use NCCL 2.28+ for better performance.", + version); } if (version >= NCCL_VERSION(2, 19, 0)) { load_symbol(apis.ncclMemAlloc, "ncclMemAlloc"); @@ -97,8 +96,7 @@ static NcclApis& nccl_apis() load_symbol(apis.ncclCommSplit, "ncclCommSplit"); } else { - TM_LOG_WARNING("[NCCL] Splitting communicators is not supported by NCCL %d, use NCCL 2.18+ if needed.", - version); + TM_LOG_WARN("Splitting communicators is not supported by NCCL {}, use NCCL 2.18+ if needed.", version); } return apis; }(); @@ -116,16 +114,16 @@ public: ~NcclCommImpl() { for (const auto& [ptr, _] : handles_.at(0)) { - TM_LOG_WARNING("[NCCL][%d] Buffer %p is not deregistered", global_rank_, ptr); + TM_LOG_WARN("Rank {}: Buffer {} is not deregistered", global_rank_, ptr); } for (const auto& [ptr, size] : buffers_) { - TM_LOG_WARNING("[NCCL][%d] Allocation (%p, %lu) is not freed", global_rank_, ptr, size); + TM_LOG_WARN("Rank {}: Allocation ({}, {}) is not freed", global_rank_, ptr, size); } for (auto& c : groups_) { if (auto ec = ncclCommDestroy(c); ec != ncclSuccess) { - TM_LOG_ERROR("[NCCL][%d] Failed to destroy communicator: %s", global_rank_, ncclGetErrorString(ec)); + TM_LOG_ERROR("Rank {}: Failed to destroy communicator: {}", global_rank_, ncclGetErrorString(ec)); } } } @@ -169,7 +167,7 @@ public: buffers_.erase(ptr); } else { - TM_LOG_WARNING("[NCCL][%d] Freeing %p which is not allocated by NcclComm", global_rank_, ptr); + TM_LOG_WARN("Rank {}: Freeing {} which is not allocated by NcclComm", global_rank_, ptr); } } @@ -181,7 +179,7 @@ public: } } else { - TM_LOG_WARNING("[NCCL][%d] Duplicated registration on (%p, %lu)", global_rank_, ptr, size); + TM_LOG_WARN("Rank {}: Duplicated registration on ({}, {})", global_rank_, ptr, size); } } @@ -193,7 +191,7 @@ public: } } else { - TM_LOG_WARNING("[NCCL][%d] Deregistering non-registered address %p", global_rank_, ptr); + TM_LOG_WARN("Rank {}: Deregistering non-registered address {}", global_rank_, ptr); } } diff --git a/src/turbomind/comm/test_comm.cu b/src/turbomind/comm/test_comm.cu index 37415090a6..4cbdf5c2e7 100644 --- a/src/turbomind/comm/test_comm.cu +++ b/src/turbomind/comm/test_comm.cu @@ -740,9 +740,9 @@ struct TestComm { std::mt19937 gen{}; std::uniform_int_distribution dist{0, 31}; // 5 mantissa bits - TM_LOG_INFO("dp_size_0 %d, tp_size_0 %d", dp_size_0, tp_size_0); - TM_LOG_INFO("dp_size_1 %d, tp_size_1 %d", dp_size_1, tp_size_1); - TM_LOG_INFO("inner_tp %d", inner_tp); + TM_LOG_INFO("dp_size_0 {}, tp_size_0 {}", dp_size_0, tp_size_0); + TM_LOG_INFO("dp_size_1 {}, tp_size_1 {}", dp_size_1, tp_size_1); + TM_LOG_INFO("inner_tp {}", inner_tp); vector tokens = tokens_; for (auto& x : tokens) { @@ -817,7 +817,7 @@ struct TestComm { const int tp_rank_1 = d_comm->rank(group1); const int local_id = g_rank / inner_tp; // which local partition this rank belongs to - // TM_LOG_INFO("g_rank %d, dp_rank_0 %d, tp_rank_0 %d, dp_rank_1 %d, tp_rank_1 %d, local_id %d", + // TM_LOG_INFO("g_rank {}, dp_rank_0 {}, tp_rank_0 {}, dp_rank_1 {}, tp_rank_1 {}, local_id {}", // g_rank, // dp_rank_0, // tp_rank_0, @@ -885,7 +885,7 @@ struct TestComm { for (const auto& n : tokens) { if (n % dp_size_1) { if (g_rank == 0) { - TM_LOG_INFO("Skipped %d", n); + TM_LOG_INFO("Skipped {}", n); } continue; } diff --git a/src/turbomind/core/CMakeLists.txt b/src/turbomind/core/CMakeLists.txt index 56003a60f1..6e8e6bc49d 100644 --- a/src/turbomind/core/CMakeLists.txt +++ b/src/turbomind/core/CMakeLists.txt @@ -2,6 +2,19 @@ cmake_minimum_required(VERSION 3.11) +include(FetchContent) + +FetchContent_Declare( + concurrentqueue + GIT_REPOSITORY https://github.com/cameron314/concurrentqueue.git + GIT_TAG v1.0.4 + GIT_SHALLOW ON + GIT_PROGRESS TRUE + USES_TERMINAL_DOWNLOAD TRUE + EXCLUDE_FROM_ALL +) +FetchContent_MakeAvailable(concurrentqueue) + add_library(core STATIC check.cc allocator.cc @@ -12,9 +25,10 @@ add_library(core STATIC tensor.cc tensor.cu module.cc - copy.cc) + copy.cc + logger.cc) -target_link_libraries(core PUBLIC cuda_utils logger CUDA::cudart CUDA::cuda_driver) +target_link_libraries(core PUBLIC cuda_utils CUDA::cudart CUDA::cuda_driver fmt::fmt concurrentqueue) set_property(TARGET core PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET core PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) @@ -23,5 +37,8 @@ target_compile_options(core PRIVATE $<$:-Xptxas=-v>) if (BUILD_TEST) add_executable(test_core test_core.cc) - target_link_libraries(test_core PRIVATE core logger Catch2::Catch2WithMain) + target_link_libraries(test_core PRIVATE core Catch2::Catch2WithMain) + + add_executable(test_logger test_logger.cc) + target_link_libraries(test_logger PRIVATE core Catch2::Catch2WithMain) endif () diff --git a/src/turbomind/core/allocator.h b/src/turbomind/core/allocator.h index 08ded8616a..af1a5c0570 100644 --- a/src/turbomind/core/allocator.h +++ b/src/turbomind/core/allocator.h @@ -146,7 +146,7 @@ class StackAllocatorImpl: public AllocatorImpl { p = underlying_impl_->allocate(size); } - // TM_LOG_ERROR("allocate %p, %ld", p, size); + // TM_LOG_ERROR("allocate {}, {}", p, size); size_ += size; ++num_; @@ -159,7 +159,7 @@ class StackAllocatorImpl: public AllocatorImpl { { size = round_up(size, kAlignment); - // TM_LOG_ERROR("deallocate %p, %p, %ld", p, cached_ptr_, size); + // TM_LOG_ERROR("deallocate {}, {}, {}", p, cached_ptr_, size); if ((char*)p + size == cached_ptr_) { cached_ptr_ -= size; diff --git a/src/turbomind/core/check.cc b/src/turbomind/core/check.cc index 47ad9a2ec7..16dbe53279 100644 --- a/src/turbomind/core/check.cc +++ b/src/turbomind/core/check.cc @@ -5,7 +5,7 @@ #include #include "src/turbomind/core/check.h" -#include "src/turbomind/utils/logger.h" +#include "src/turbomind/core/logger.h" namespace turbomind::core { @@ -61,7 +61,7 @@ std::string* CheckOpStringBuilder::NewString() return new std::string{oss_->str()}; } -CheckErrorStream::CheckErrorStream(const char* file, int line, const char* expr) +CheckErrorStream::CheckErrorStream(const char* file, int line, const char* expr): file_{file}, line_{line} { oss_ = new std::ostringstream{}; *oss_ << StripSrcPrefix(file) << "(" << line << "): Check failed: " << expr << " "; @@ -75,16 +75,12 @@ CheckErrorStream::CheckErrorStream(const char* file, int line, const char* expr, void CheckErrorStream::Report() { - // ! Be aware of `%` in expr - std::cerr << "[TM][FATAL] " << oss_->str() << "\n"; - std::abort(); + Logger::Instance().LogFatal(SourceLocation{file_, line_}, "{}", oss_->str()); } void ReportNullError(const char* file, int line, const char* expr) { - // ! Be aware of `%` in expr - std::cerr << "[TM][FATAL] " << StripSrcPrefix(file) << "(" << line << "): '" << expr << "' Must be non NULL\n"; - std::abort(); + Logger::Instance().LogFatal(SourceLocation{file, line}, "{}: '{}' Must be non NULL", StripSrcPrefix(file), expr); } } // namespace turbomind::core diff --git a/src/turbomind/core/check.h b/src/turbomind/core/check.h index 33b275251f..b8daa66ac3 100644 --- a/src/turbomind/core/check.h +++ b/src/turbomind/core/check.h @@ -54,6 +54,8 @@ class CheckErrorStream { [[noreturn]] void Report(); std::ostringstream* oss_; + const char* file_; + int line_; }; class CheckOpStringBuilder { diff --git a/src/turbomind/core/logger.cc b/src/turbomind/core/logger.cc new file mode 100644 index 0000000000..1aae152ad4 --- /dev/null +++ b/src/turbomind/core/logger.cc @@ -0,0 +1,393 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "src/turbomind/core/logger.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#endif + +#include +#include + +namespace turbomind::core { + +// --------------------------------------------------------------------------- +// Timestamp: MMDD.HH:MM:SS.uuuuuu (dot connects date and time). +// Same approach as glog: system_clock::now(), to_time_t, localtime_r; microseconds +// as time since start of current second (no modulo). +// --------------------------------------------------------------------------- +static std::string Timestamp() +{ + auto now = std::chrono::system_clock::now(); + auto t = std::chrono::system_clock::to_time_t(now); + auto us = + std::chrono::duration_cast(now - std::chrono::system_clock::from_time_t(t)).count(); + std::tm tm_buf; +#ifdef _WIN32 + if (::localtime_s(&tm_buf, &t) != 0) { + return "0000.00:00:00.000000"; + } + std::tm* tm = &tm_buf; +#else + std::tm* tm = ::localtime_r(&t, &tm_buf); + if (tm == nullptr) { + return "0000.00:00:00.000000"; + } +#endif + return fmt::format("{:02}{:02}.{:02}:{:02}:{:02}.{:06}", + tm->tm_mon + 1, + tm->tm_mday, + tm->tm_hour, + tm->tm_min, + tm->tm_sec, + static_cast(us)); +} + +// --------------------------------------------------------------------------- +// Basename of __FILE__ (substring after last '/') +// --------------------------------------------------------------------------- +static const char* Basename(const char* file) +{ + const char* last_sep = std::strrchr(file, '/'); +#ifdef _WIN32 + const char* last_bs = std::strrchr(file, '\\'); + if (last_bs && (!last_sep || last_bs > last_sep)) { + last_sep = last_bs; + } +#endif + return last_sep ? last_sep + 1 : file; +} + +// --------------------------------------------------------------------------- +// Color palette per log level +// --------------------------------------------------------------------------- +static fmt::text_style StyleFor(Logger::Level level) +{ + switch (level) { + case Logger::Level::kTrace: + return fmt::fg(fmt::color::gray); + case Logger::Level::kDebug: + return fmt::fg(fmt::color::cyan); + case Logger::Level::kInfo: + return {}; + case Logger::Level::kWarning: + return fmt::fg(fmt::color::yellow); + case Logger::Level::kError: + return fmt::fg(fmt::color::red) | fmt::emphasis::bold; + case Logger::Level::kFatal: + return fmt::fg(fmt::color::red) | fmt::emphasis::bold; + default: + return {}; + } +} + +// --------------------------------------------------------------------------- +// Color auto-detection: TM_LOG_COLOR env var → isatty fallback +// --------------------------------------------------------------------------- +static bool UseColor() +{ + static const bool kUseColor = [] { + const char* env = std::getenv("TM_LOG_COLOR"); + if (env != nullptr) { + return std::string_view{env} != "0"; + } +#ifndef _WIN32 + return ::isatty(STDERR_FILENO) != 0; +#else + return true; +#endif + }(); + return kUseColor; +} + +static void PrintStyled(Logger::Level level, std::string_view msg) +{ + if (UseColor()) { + fmt::print(stderr, StyleFor(level), "{}", msg); + } + else { + fmt::print(stderr, "{}", msg); + } +} + +// --------------------------------------------------------------------------- +// AsyncLogWorker internals — entirely private to this translation unit +// --------------------------------------------------------------------------- + +enum class RecordKind +{ + kNormal, + kStop +}; + +struct LogRecord { + RecordKind kind = RecordKind::kNormal; + Logger::Level level = Logger::Level::kInfo; + std::string message; +}; + +class AsyncLogWorker { +public: + static AsyncLogWorker& Instance(); + + AsyncLogWorker(const AsyncLogWorker&) = delete; + AsyncLogWorker& operator=(const AsyncLogWorker&) = delete; + + void Enqueue(LogRecord record); + void Stop(); + void OnSignal(); + + ~AsyncLogWorker(); + +private: + AsyncLogWorker(); + + void Run(); + + moodycamel::BlockingConcurrentQueue queue_; + std::thread thread_; + std::atomic_flag stopped_ = ATOMIC_FLAG_INIT; + + std::atomic signal_shutdown_{false}; + std::atomic signal_drain_done_{false}; + std::atomic worker_ready_{false}; + std::thread::id worker_thread_id_{}; +}; + +// --------------------------------------------------------------------------- +// Logger — thread-local frontend +// --------------------------------------------------------------------------- + +Logger& Logger::Instance() +{ + thread_local Logger inst; + return inst; +} + +Logger::Logger() +{ + const char* async_env = std::getenv("TM_LOG_ASYNC"); + if (async_env != nullptr && std::string_view{async_env} == "0") { + async_ = false; + } + + const char* level_env = std::getenv("TM_LOG_LEVEL"); + if (level_env == nullptr) { + return; + } + else { + using Entry = std::pair; + static constexpr std::array kNameToLevel = {{ + {"TRACE", Level::kTrace}, + {"DEBUG", Level::kDebug}, + {"INFO", Level::kInfo}, + {"WARN", Level::kWarning}, + {"WARNING", Level::kWarning}, + {"ERROR", Level::kError}, + {"FATAL", Level::kFatal}, + }}; + + const std::string name_upper = [&] { + std::string s{level_env}; + std::transform(s.begin(), s.end(), s.begin(), [](unsigned char c) { return std::toupper(c); }); + return s; + }(); + auto it = std::find_if( + kNameToLevel.begin(), kNameToLevel.end(), [&](const Entry& e) { return e.first == name_upper; }); + if (it != kNameToLevel.end()) { + level_ = it->second; + } + else { + PrintStyled(Level::kWarning, + fmt::format("[TM][WARN] Invalid TM_LOG_LEVEL='{}'. Using default level.\n", level_env)); + } + } +} + +void Logger::set_level(Level level) +{ + level_ = level; +} + +std::string Logger::LevelName(Level level) +{ + switch (level) { + case Level::kTrace: + return "TRACE"; + case Level::kDebug: + return "DEBUG"; + case Level::kInfo: + return "INFO"; + case Level::kWarning: + return "WARN"; + case Level::kError: + return "ERROR"; + case Level::kFatal: + return "FATAL"; + default: + return "UNKNOWN"; + } +} + +std::string Logger::Prefix(Level level, const char* file, int line) +{ + std::string s = fmt::format("[TM][{}][{}]", LevelName(level), Timestamp()); + if (file != nullptr) { + s += fmt::format("[{}:{}]", Basename(file), line); + } + s += " "; + return s; +} + +void Logger::Enqueue(Level level, const char* file, int line, std::string message) +{ + std::string line_str = Prefix(level, file, line); + line_str.reserve(line_str.size() + message.size() + 1); + line_str.append(std::move(message)); + line_str += '\n'; + if (TM_LIKELY(async_ && level != Level::kFatal)) { + LogRecord record; + record.level = level; + record.message = std::move(line_str); + AsyncLogWorker::Instance().Enqueue(std::move(record)); + } + else { + PrintStyled(level, line_str); + } +} + +void Logger::Enqueue(Level level, std::string message) +{ + Enqueue(level, nullptr, 0, std::move(message)); +} + +// --------------------------------------------------------------------------- +// AsyncLogWorker — background I/O thread +// --------------------------------------------------------------------------- + +AsyncLogWorker& AsyncLogWorker::Instance() +{ + static AsyncLogWorker worker; + return worker; +} + +static void OnFatalSignal(int signum) +{ + AsyncLogWorker::Instance().OnSignal(); + ::signal(signum, SIG_DFL); + ::raise(signum); +} + +AsyncLogWorker::AsyncLogWorker() +{ + thread_ = std::thread(&AsyncLogWorker::Run, this); + while (!worker_ready_.load(std::memory_order_acquire)) { + std::this_thread::yield(); + } + + const char* signals_env = std::getenv("TM_LOG_SIGNALS"); + if (signals_env == nullptr || std::string_view{signals_env} != "0") { + for (int sig : {SIGSEGV, + SIGABRT, + SIGFPE, + SIGILL +#ifdef SIGBUS + , + SIGBUS +#endif + }) { + ::signal(sig, OnFatalSignal); + } + } +} + +void AsyncLogWorker::OnSignal() +{ + stopped_.test_and_set(); + + if (std::this_thread::get_id() == worker_thread_id_) { + LogRecord record; + while (queue_.try_dequeue(record)) { + if (record.kind != RecordKind::kStop) { + PrintStyled(record.level, record.message); + } + } + } + else { + signal_shutdown_.store(true, std::memory_order_release); + for (int i = 0; i < 2000; ++i) { + if (signal_drain_done_.load(std::memory_order_acquire)) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + } +} + +void AsyncLogWorker::Stop() +{ + if (stopped_.test_and_set()) { + return; // already stopping or stopped + } + LogRecord stop; + stop.kind = RecordKind::kStop; + queue_.enqueue(std::move(stop)); + thread_.join(); +} + +AsyncLogWorker::~AsyncLogWorker() +{ + Stop(); +} + +void AsyncLogWorker::Enqueue(LogRecord record) +{ + queue_.enqueue(std::move(record)); +} + +void AsyncLogWorker::Run() +{ + worker_thread_id_ = std::this_thread::get_id(); + worker_ready_.store(true, std::memory_order_release); + + LogRecord record; + while (true) { + bool got = queue_.wait_dequeue_timed(record, std::chrono::milliseconds(100)); + + if (got) { + if (record.kind == RecordKind::kStop) { + while (queue_.try_dequeue(record)) { + if (record.kind != RecordKind::kStop) { + PrintStyled(record.level, record.message); + } + } + signal_drain_done_.store(true, std::memory_order_release); + return; + } + PrintStyled(record.level, record.message); + } + + if (signal_shutdown_.load(std::memory_order_acquire)) { + while (queue_.try_dequeue(record)) { + if (record.kind != RecordKind::kStop) { + PrintStyled(record.level, record.message); + } + } + signal_drain_done_.store(true, std::memory_order_release); + return; + } + } +} + +} // namespace turbomind::core diff --git a/src/turbomind/core/logger.h b/src/turbomind/core/logger.h new file mode 100644 index 0000000000..72cea2c242 --- /dev/null +++ b/src/turbomind/core/logger.h @@ -0,0 +1,113 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#pragma once + +#include + +#include +#include + +#include "src/turbomind/core/check.h" + +namespace turbomind::core { + +struct SourceLocation { + const char* file; + int line; +}; + +class Logger { +public: + enum class Level + { + kTrace = 0, + kDebug = 10, + kInfo = 20, + kWarning = 30, + kError = 40, + kFatal = 50, + }; + + // Returns the thread-local Logger instance. + static Logger& Instance(); + + Logger(const Logger&) = delete; + Logger& operator=(const Logger&) = delete; + + template + void Log(Level level, fmt::format_string fmt_str, Args&&... args) + { + if (level_ <= level) { + Enqueue(level, nullptr, 0, fmt::format(fmt_str, std::forward(args)...)); + } + } + + template + void Log(Level level, SourceLocation loc, fmt::format_string fmt_str, Args&&... args) + { + if (level_ <= level) { + Enqueue(level, loc.file, loc.line, fmt::format(fmt_str, std::forward(args)...)); + } + } + + template + [[noreturn]] void LogFatal(SourceLocation loc, fmt::format_string fmt_str, Args&&... args) + { + Enqueue(Level::kFatal, loc.file, loc.line, fmt::format(fmt_str, std::forward(args)...)); + std::abort(); + } + + void set_level(Level level); + + Level get_level() const + { + return level_; + } + + bool is_async() const + { + return async_; + } + +private: + Logger(); + + void Enqueue(Level level, std::string message); + void Enqueue(Level level, const char* file, int line, std::string message); + + static std::string LevelName(Level level); + static std::string Prefix(Level level, const char* file, int line); + +#ifndef NDEBUG + Level level_ = Level::kDebug; +#else + Level level_ = Level::kInfo; +#endif + bool async_ = true; +}; + +} // namespace turbomind::core + +// --------------------------------------------------------------------------- +// Convenience macros +// --------------------------------------------------------------------------- +#define TM_LOG(level, ...) \ + do { \ + if (turbomind::core::Logger::Instance().get_level() <= (level)) { \ + turbomind::core::Logger::Instance().Log( \ + (level), turbomind::core::SourceLocation{__FILE__, __LINE__}, __VA_ARGS__); \ + } \ + } while (0) + +#define TM_LOG_TRACE(...) TM_LOG(turbomind::core::Logger::Level::kTrace, __VA_ARGS__) +#define TM_LOG_DEBUG(...) TM_LOG(turbomind::core::Logger::Level::kDebug, __VA_ARGS__) +#define TM_LOG_INFO(...) TM_LOG(turbomind::core::Logger::Level::kInfo, __VA_ARGS__) +#define TM_LOG_WARN(...) TM_LOG(turbomind::core::Logger::Level::kWarning, __VA_ARGS__) +#define TM_LOG_ERROR(...) TM_LOG(turbomind::core::Logger::Level::kError, __VA_ARGS__) +#define TM_LOG_FATAL(...) \ + do { \ + turbomind::core::Logger::Instance().LogFatal(turbomind::core::SourceLocation{__FILE__, __LINE__}, \ + __VA_ARGS__); \ + } while (0) + +#define TM_LOG_WARNING(...) TM_LOG_WARN(__VA_ARGS__) diff --git a/src/turbomind/core/stream.h b/src/turbomind/core/stream.h index 9727dd5b64..cf411c9ebb 100644 --- a/src/turbomind/core/stream.h +++ b/src/turbomind/core/stream.h @@ -17,7 +17,7 @@ class StreamImpl { ~StreamImpl() { if (auto ec = cudaStreamDestroy(stream_); ec != cudaSuccess) { - TM_LOG_ERROR(cudaGetErrorString(ec)); + TM_LOG_ERROR("{}", cudaGetErrorString(ec)); } stream_ = {}; } @@ -99,7 +99,7 @@ class EventImpl { ~EventImpl() { if (auto ec = cudaEventDestroy(event_); ec != cudaSuccess) { - TM_LOG_ERROR(cudaGetErrorString(ec)); + TM_LOG_ERROR("{}", cudaGetErrorString(ec)); } } diff --git a/src/turbomind/core/tensor.cc b/src/turbomind/core/tensor.cc index b579ea0d86..293d66573e 100644 --- a/src/turbomind/core/tensor.cc +++ b/src/turbomind/core/tensor.cc @@ -156,7 +156,7 @@ void Copy(const Tensor& src, Tensor& dst, Stream& stream) param.kind = cudaMemcpyDefault; if (auto ec = cudaMemcpy3DAsync(¶m, stream.handle()); ec == cudaSuccess) { - TM_LOG_WARNING(cudaGetErrorString(ec)); + TM_LOG_WARN("{}", cudaGetErrorString(ec)); return; } } diff --git a/src/turbomind/core/test_logger.cc b/src/turbomind/core/test_logger.cc new file mode 100644 index 0000000000..059fd6a136 --- /dev/null +++ b/src/turbomind/core/test_logger.cc @@ -0,0 +1,505 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "src/turbomind/core/logger.h" + +#include + +#include +#include +#include +#include +#include + +// POSIX headers needed for: pipe/fork/dup2 (stderr capture), setenv/unsetenv (env +// var tests), waitpid/SIGABRT (signal handling test). All guarded behind #ifndef +// _WIN32 along with the test cases that use them. +#ifndef _WIN32 +#include +#include +#endif + +using turbomind::core::Logger; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +TEST_CASE("Logger: SetLevel / GetLevel round-trip", "[logger]") +{ + auto& log = Logger::Instance(); + + log.set_level(Logger::Level::kTrace); + REQUIRE(log.get_level() == Logger::Level::kTrace); + + log.set_level(Logger::Level::kWarning); + REQUIRE(log.get_level() == Logger::Level::kWarning); + + log.set_level(Logger::Level::kError); + REQUIRE(log.get_level() == Logger::Level::kError); + + // Restore default + log.set_level(Logger::Level::kDebug); +} + +// --------------------------------------------------------------------------- +// POSIX-specific tests (stderr capture, env vars, signal handling) +// --------------------------------------------------------------------------- +#ifndef _WIN32 + +// Tests run in sync mode (TM_LOG_ASYNC=0) so log output is written inline and +// CaptureStderr sees complete output when fn() returns. +static bool SetSyncModeForTests() +{ + return ::setenv("TM_LOG_ASYNC", "0", 1) == 0; +} +static bool kSyncMode = SetSyncModeForTests(); + +// --------------------------------------------------------------------------- +// Stderr capture helper +// --------------------------------------------------------------------------- +// Redirects stderr to a pipe, runs `fn`, restores stderr, returns what was written. +// A background reader drains the pipe so it never fills (avoids blocking when +// multiple threads write in sync mode). Requires sync mode (TM_LOG_ASYNC=0). +static std::string CaptureStderr(std::function fn) +{ + int saved = ::dup(STDERR_FILENO); + REQUIRE(saved >= 0); + + std::array pipefd{}; + REQUIRE(::pipe(pipefd.data()) == 0); + REQUIRE(::dup2(pipefd[1], STDERR_FILENO) >= 0); + ::close(pipefd[1]); + + std::string output; + std::thread reader([&output, read_fd = pipefd[0]]() { + char buf[4096]; + ssize_t n; + while ((n = ::read(read_fd, buf, sizeof(buf))) > 0) { + output.append(buf, static_cast(n)); + } + ::close(read_fd); + }); + + fn(); + + REQUIRE(::dup2(saved, STDERR_FILENO) >= 0); + ::close(saved); + reader.join(); + return output; +} + +TEST_CASE("Logger: prefix format", "[logger]") +{ + auto& log = Logger::Instance(); + log.set_level(Logger::Level::kTrace); + + auto output = CaptureStderr([&] { log.Log(Logger::Level::kInfo, "hello"); }); + + // New format: [TM][MMDD HH:MM:SS.uuuuuu][LEVEL] message (no file/line for direct Log) + REQUIRE(output.find("[TM]") != std::string::npos); + REQUIRE(output.find("[INFO]") != std::string::npos); + REQUIRE(output.find("hello") != std::string::npos); +} + +TEST_CASE("Logger: format arguments", "[logger]") +{ + auto& log = Logger::Instance(); + log.set_level(Logger::Level::kTrace); + + auto output = CaptureStderr( + [&] { log.Log(Logger::Level::kDebug, "int={} float={:.2f} str={}", 42, 3.14f, std::string("world")); }); + + REQUIRE(output.find("int=42") != std::string::npos); + REQUIRE(output.find("float=3.14") != std::string::npos); + REQUIRE(output.find("str=world") != std::string::npos); +} + +TEST_CASE("Logger: level filtering", "[logger]") +{ + auto& log = Logger::Instance(); + log.set_level(Logger::Level::kWarning); + + auto output = CaptureStderr([&] { + log.Log(Logger::Level::kDebug, "should be suppressed"); + log.Log(Logger::Level::kInfo, "also suppressed"); + log.Log(Logger::Level::kWarning, "should appear"); + log.Log(Logger::Level::kError, "also appears"); + }); + + REQUIRE(output.find("should be suppressed") == std::string::npos); + REQUIRE(output.find("also suppressed") == std::string::npos); + REQUIRE(output.find("should appear") != std::string::npos); + REQUIRE(output.find("also appears") != std::string::npos); + + // Restore + log.set_level(Logger::Level::kDebug); +} + +TEST_CASE("Logger: TM_LOG_LEVEL env var", "[logger]") +{ + // This must use a fresh thread to get a new thread_local Logger instance + // that picks up the env var in its constructor. + ::setenv("TM_LOG_LEVEL", "WARNING", /*overwrite=*/1); + + std::string output; + { + std::thread t([&] { + output = CaptureStderr([&] { + auto& log = Logger::Instance(); + log.Log(Logger::Level::kDebug, "env-suppressed"); + log.Log(Logger::Level::kWarning, "env-visible"); + }); + }); + t.join(); + } + + ::unsetenv("TM_LOG_LEVEL"); + + REQUIRE(output.find("env-suppressed") == std::string::npos); + REQUIRE(output.find("env-visible") != std::string::npos); +} + +TEST_CASE("Logger: TM_LOG_LEVEL is case-insensitive", "[logger]") +{ + for (const char* val : {"warning", "Warning", "WARNING"}) { + ::setenv("TM_LOG_LEVEL", val, /*overwrite=*/1); + + std::string output; + { + std::thread t([&] { + output = CaptureStderr([&] { + auto& log = Logger::Instance(); + log.Log(Logger::Level::kDebug, "case-suppressed"); + log.Log(Logger::Level::kWarning, "case-visible"); + }); + }); + t.join(); + } + + REQUIRE(output.find("case-suppressed") == std::string::npos); + REQUIRE(output.find("case-visible") != std::string::npos); + } + + ::unsetenv("TM_LOG_LEVEL"); +} + +TEST_CASE("Logger: macros emit correct prefix", "[logger]") +{ + Logger::Instance().set_level(Logger::Level::kTrace); + + auto output = CaptureStderr([&] { + TM_LOG_TRACE("trace-msg"); + TM_LOG_DEBUG("debug-msg"); + TM_LOG_INFO("info-msg"); + TM_LOG_WARN("warn-msg"); + TM_LOG_ERROR("error-msg"); + }); + + // Format: [TM][MMDD HH:MM:SS.uuuuuu][LEVEL][basename:line] message + REQUIRE(output.find("[TM]") != std::string::npos); + REQUIRE(output.find("[TRACE]") != std::string::npos); + REQUIRE(output.find("[DEBUG]") != std::string::npos); + REQUIRE(output.find("[INFO]") != std::string::npos); + REQUIRE(output.find("[WARN]") != std::string::npos); + REQUIRE(output.find("[ERROR]") != std::string::npos); + // Macros pass __FILE__ and __LINE__: expect basename and line in output + REQUIRE(output.find("test_logger.cc:") != std::string::npos); + // Glog-style timestamp: MMDD HH:MM:SS.uuuuuu (contains space, colon, dot for time) + REQUIRE(output.find(".") != std::string::npos); + + Logger::Instance().set_level(Logger::Level::kDebug); +} + +TEST_CASE("Logger: TM_LOG_ASYNC=0 selects sync mode", "[logger]") +{ + // Sync mode: output is written by the calling thread. + ::setenv("TM_LOG_ASYNC", "0", /*overwrite=*/1); + + std::string output; + { + std::thread t([&] { + // Capture stderr inside the new thread. + int saved = ::dup(STDERR_FILENO); + std::array pipefd{}; + REQUIRE(::pipe(pipefd.data()) == 0); + REQUIRE(::dup2(pipefd[1], STDERR_FILENO) >= 0); + ::close(pipefd[1]); + + auto& log = Logger::Instance(); + REQUIRE_FALSE(log.is_async()); + log.Log(Logger::Level::kInfo, "sync-message"); + // Sync mode writes inline. + + REQUIRE(::dup2(saved, STDERR_FILENO) >= 0); + ::close(saved); + + char buf[4096]; + ssize_t n = ::read(pipefd[0], buf, sizeof(buf)); + if (n > 0) { + output.assign(buf, static_cast(n)); + } + ::close(pipefd[0]); + }); + t.join(); + } + + ::unsetenv("TM_LOG_ASYNC"); + + REQUIRE(output.find("sync-message") != std::string::npos); + REQUIRE(output.find("[INFO]") != std::string::npos); +} + +TEST_CASE("Logger: async ordering under concurrent producers", "[logger]") +{ + ::setenv("TM_LOG_ASYNC", "0", 1); // ensure worker threads use sync mode so capture sees all output + Logger::Instance().set_level(Logger::Level::kTrace); + + constexpr int kThreads = 4; + constexpr int kPerThread = 250; + constexpr int kTotal = kThreads * kPerThread; + + auto output = CaptureStderr([&] { + std::vector threads; + threads.reserve(kThreads); + for (int t = 0; t < kThreads; ++t) { + threads.emplace_back([t] { + for (int i = 0; i < kPerThread; ++i) { + TM_LOG_INFO("thread={} i={}", t, i); + } + }); + } + for (auto& th : threads) { + th.join(); + } + }); + + // Count lines — each message is exactly one line. + int lines = 0; + for (char c : output) { + if (c == '\n') { + ++lines; + } + } + + REQUIRE(lines == kTotal); +} + +// --------------------------------------------------------------------------- +// Signal-handling drain test (fork-based) +// --------------------------------------------------------------------------- +// Verifies that OnFatalSignal drains the async worker's queue before +// re-raising the signal. Because raising SIGABRT kills the process, we +// fork a child that enables async mode, enqueues messages, and raises +// SIGABRT. The parent reads the child's stderr via a pipe and checks that +// every message is present and that the child died from SIGABRT. +// +// IMPORTANT: This test relies on AsyncLogWorker::Instance() never having +// been called in the parent process. All preceding tests use sync mode +// (TM_LOG_ASYNC=0) and never trigger the singleton. If a future test +// triggers async mode before this one, the fork will inherit a destroyed +// worker thread and the singleton will be in an invalid state. +// --------------------------------------------------------------------------- + +TEST_CASE("Logger: async signal handler drains queue on fatal signal", "[logger][signal]") +{ + // Guard: verify we haven't triggered async mode in this process. + // All preceding tests force TM_LOG_ASYNC=0 via kSyncMode. If a future + // test enables async before this one, the fork inherits a destroyed + // AsyncLogWorker thread and the singleton is in an invalid state. + const char* async_env = std::getenv("TM_LOG_ASYNC"); + REQUIRE((async_env != nullptr && std::string_view{async_env} == "0")); + + constexpr int kMsgCount = 20; + std::vector markers; + markers.reserve(kMsgCount); + for (int i = 0; i < kMsgCount; ++i) { + markers.push_back(fmt::format("drain-test-msg-{:03d}-Ax9B{}", i, i)); + } + + int pipefd[2]; + REQUIRE(::pipe(pipefd) == 0); + + pid_t pid = ::fork(); + REQUIRE(pid >= 0); + + if (pid == 0) { + // ---- CHILD PROCESS ---- + ::close(pipefd[0]); + ::dup2(pipefd[1], STDERR_FILENO); + ::close(pipefd[1]); + + // Enable async mode for any new Logger instance. + ::setenv("TM_LOG_ASYNC", "1", 1); + ::setenv("TM_LOG_LEVEL", "TRACE", 1); + + // Spawn a fresh thread to get a new thread_local Logger that picks + // up TM_LOG_ASYNC=1 (async mode). + std::thread worker([&] { + auto& log = Logger::Instance(); + if (!log.is_async()) { + fmt::print(stderr, "CHILD-ERROR: not in async mode\n"); + ::_exit(2); + } + + // Enqueue messages rapidly. Some may be drained by the worker + // thread before the signal fires; others will remain in the + // queue and must be drained by OnFatalSignal. + for (int i = 0; i < kMsgCount; ++i) { + log.Log(Logger::Level::kInfo, markers[i]); + } + + // Small delay so some messages are consumed by the normal worker + // loop, creating a realistic mix of already-printed and queued. + ::usleep(1000); + + // OnFatalSignal should: Stop() → drain queue → restore SIG_DFL → re-raise. + ::raise(SIGABRT); + ::_exit(0); + }); + + worker.join(); + ::_exit(1); + } + + // ---- PARENT PROCESS ---- + ::close(pipefd[1]); + + std::string child_output; + { + char buf[4096]; + ssize_t n; + while ((n = ::read(pipefd[0], buf, sizeof(buf))) > 0) { + child_output.append(buf, static_cast(n)); + } + ::close(pipefd[0]); + } + + int wstatus = 0; + REQUIRE(::waitpid(pid, &wstatus, 0) == pid); + + // Child should have been killed by SIGABRT. + REQUIRE(WIFSIGNALED(wstatus)); + REQUIRE(WTERMSIG(wstatus) == SIGABRT); + + // Every marker must appear in the child's stderr output. + for (int i = 0; i < kMsgCount; ++i) { + REQUIRE(child_output.find(markers[i]) != std::string::npos); + } +} + +// --------------------------------------------------------------------------- +// TM_LOG_FATAL abort test (fork-based) +// --------------------------------------------------------------------------- +TEST_CASE("Logger: TM_LOG_FATAL aborts the process", "[logger][fatal]") +{ + int pipefd[2]; + REQUIRE(::pipe(pipefd) == 0); + + pid_t pid = ::fork(); + REQUIRE(pid >= 0); + + if (pid == 0) { + // ---- CHILD PROCESS ---- + ::close(pipefd[0]); + ::dup2(pipefd[1], STDERR_FILENO); + ::close(pipefd[1]); + + ::setenv("TM_LOG_ASYNC", "0", 1); + ::setenv("TM_LOG_LEVEL", "TRACE", 1); + + std::thread worker([&] { + // Reset Catch2's inherited SIGABRT handler so std::abort() + // kills the process rather than triggering Catch2's reporter. + ::signal(SIGABRT, SIG_DFL); + TM_LOG_FATAL("fatal-test-marker-{}", 42); + }); + worker.join(); + ::_exit(0); // Should not reach here + } + + // ---- PARENT PROCESS ---- + ::close(pipefd[1]); + + std::string child_output; + { + char buf[4096]; + ssize_t n; + while ((n = ::read(pipefd[0], buf, sizeof(buf))) > 0) { + child_output.append(buf, static_cast(n)); + } + ::close(pipefd[0]); + } + + int wstatus = 0; + REQUIRE(::waitpid(pid, &wstatus, 0) == pid); + + // Child should have been killed by SIGABRT (from std::abort). + REQUIRE(WIFSIGNALED(wstatus)); + REQUIRE(WTERMSIG(wstatus) == SIGABRT); + + // Fatal message should appear in output. + REQUIRE(child_output.find("fatal-test-marker-42") != std::string::npos); + REQUIRE(child_output.find("[FATAL]") != std::string::npos); +} + +// --------------------------------------------------------------------------- +// Color disable test +// --------------------------------------------------------------------------- +TEST_CASE("Logger: TM_LOG_COLOR=0 disables ANSI escape codes", "[logger][color]") +{ + Logger::Instance().set_level(Logger::Level::kTrace); + + auto output = CaptureStderr([&] { + // Color is determined at first use; since this test process started + // with TM_LOG_COLOR potentially unset, we use a forked child to + // control the env var from scratch. + }); + + // Fork a child with TM_LOG_COLOR=0 to test colorless output. + int pipefd[2]; + REQUIRE(::pipe(pipefd) == 0); + + pid_t pid = ::fork(); + REQUIRE(pid >= 0); + + if (pid == 0) { + ::close(pipefd[0]); + ::dup2(pipefd[1], STDERR_FILENO); + ::close(pipefd[1]); + + ::setenv("TM_LOG_COLOR", "0", 1); + ::setenv("TM_LOG_ASYNC", "0", 1); + ::setenv("TM_LOG_LEVEL", "TRACE", 1); + + std::thread worker([&] { + auto& log = Logger::Instance(); + log.Log(Logger::Level::kError, "no-color-marker"); + }); + worker.join(); + ::_exit(0); + } + + ::close(pipefd[1]); + + std::string child_output; + { + char buf[4096]; + ssize_t n; + while ((n = ::read(pipefd[0], buf, sizeof(buf))) > 0) { + child_output.append(buf, static_cast(n)); + } + ::close(pipefd[0]); + } + + int wstatus = 0; + REQUIRE(::waitpid(pid, &wstatus, 0) == pid); + REQUIRE(WIFEXITED(wstatus)); + + // Message must be present. + REQUIRE(child_output.find("no-color-marker") != std::string::npos); + + // Must NOT contain ANSI escape sequences (\x1b[). + REQUIRE(child_output.find("\x1b[") == std::string::npos); +} + +#endif // _WIN32 diff --git a/src/turbomind/engine/engine.cc b/src/turbomind/engine/engine.cc index 4ecd7fdcd4..391a034dae 100644 --- a/src/turbomind/engine/engine.cc +++ b/src/turbomind/engine/engine.cc @@ -17,10 +17,10 @@ #include "src/turbomind/engine/request.h" #include "src/turbomind/core/copy.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/models/language_model.h" #include "src/turbomind/models/llama/SequenceManager.h" #include "src/turbomind/models/llama/llama_params.h" -#include "src/turbomind/utils/logger.h" #include "src/turbomind/utils/metrics.h" // #include "dbg.h" @@ -163,7 +163,7 @@ struct Engine::Impl { Engine::Impl::~Impl() { - TM_LOG_INFO(__PRETTY_FUNCTION__); + TM_LOG_INFO("{}", __PRETTY_FUNCTION__); inbound_.close(); outbound_.close(); if (internal_thread_.joinable()) { @@ -237,9 +237,9 @@ void Engine::Impl::CreateSequenceManager() const auto max_cached_tokens = seq_mgr_->max_block_count() * (size_t)cache_block_seq_len * param_.attn_cp_size; session_len_trunc_ = std::min(max_cached_tokens, (size_t)param_.session_len); - TM_LOG_INFO("max cached tokens: %lld", max_cached_tokens); + TM_LOG_INFO("max cached tokens: {}", max_cached_tokens); if (session_len_trunc_ != param_.session_len) { - TM_LOG_WARNING("`session_len` truncated to %d due to limited KV cache memory", session_len_trunc_); + TM_LOG_WARN("`session_len` truncated to {} due to limited KV cache memory", session_len_trunc_); } } @@ -259,11 +259,11 @@ void Engine::Impl::Validate(Requests& infer_reqs, Requests& kill_reqs) auto validate = [&](auto& reqs, const char* type, bool is_infer) { for (const auto& r : reqs) { if (occur[r->id] > 1) { - TM_LOG_ERROR("Skip conflicting %s request for ID %lu", type, r->id); + TM_LOG_ERROR("Skip conflicting {} request for ID {}", type, r->id); r->ec = Request::kConflict; } if (!r->ec && is_infer && has_linear_attention && !r->session.end_flag) { - TM_LOG_ERROR("Skip inconsistent %s request for ID %lu. Linear attention only supports stateless " + TM_LOG_ERROR("Skip inconsistent {} request for ID {}. Linear attention only supports stateless " "requests", type, r->id); @@ -272,13 +272,13 @@ void Engine::Impl::Validate(Requests& infer_reqs, Requests& kill_reqs) if (param_.enable_prefix_caching) { if (r->session.step != 0) { // Prefix caching is incompatible with interactive mode - TM_LOG_ERROR("Skip inconsistent %s request for ID %lu step %d", type, r->id, r->session.step); + TM_LOG_ERROR("Skip inconsistent {} request for ID {} step {}", type, r->id, r->session.step); r->ec = Request::kInconsistency; } else if (r->gen_cfg.output_logits == GenerationConfig::kAll || r->gen_cfg.output_last_hidden_state == GenerationConfig::kAll) { // Prefix caching is incompatible with outputting all tokens' logits or last_hidden_state - TM_LOG_ERROR("Skip inconsistent %s request for ID %lu. It cannot output logits or " + TM_LOG_ERROR("Skip inconsistent {} request for ID {}. It cannot output logits or " "last_hidden_states for all tokens", type, r->id); @@ -350,8 +350,8 @@ void Engine::Impl::Interrupt(RequestCache& c) } else { if (s.recurrent_states && c.seq_len != s.cache_len) { - TM_LOG_WARNING( - "[Engine][Interrupt] Invalidating cache for ID %llu due to linear-state/cache mismatch (%d vs %d)", + TM_LOG_WARN( + "[Engine][Interrupt] Invalidating cache for ID {} due to linear-state/cache mismatch ({} vs {})", s.id, c.seq_len, s.cache_len); @@ -413,7 +413,7 @@ void Engine::Impl::Accept(const Requests& rs, vector& signals) } else if (s > ptr->tokens.size()) { if (tp_rank_ == 0) { - TM_LOG_WARNING("[ProcessInferRequests] Skipping invalid step (%d) setting for ID %lu", s, ptr->id); + TM_LOG_WARN("Skipping invalid step ({}) setting for ID {}", s, ptr->id); } s = ptr->tokens.size(); } @@ -474,8 +474,8 @@ void Engine::Impl::Accept(const Requests& rs, vector& signals) if (tp_rank_ == 0) { const int trunc_output_len = max_seq_len - c->prompt_len; // clang-format off - TM_LOG_WARNING("[ProcessInferRequests] [%ld] total sequence length (%d + %d) exceeds `session_len` (%d), `max_new_tokens` is truncated to %d", - (long)seq.id, c->prompt_len, c->gen_cfg.max_new_tokens, session_len_trunc_, trunc_output_len); + TM_LOG_WARN("ID {}: total sequence length ({} + {}) exceeds `session_len` ({}), `max_new_tokens` is truncated to {}", + seq.id, c->prompt_len, c->gen_cfg.max_new_tokens, session_len_trunc_, trunc_output_len); // clang-format on } } @@ -812,7 +812,7 @@ void Engine::Impl::InternalThreadEntry() } if (rs->abort) { - TM_LOG_INFO("[Engine] stop requested."); + TM_LOG_INFO("stop requested."); break; } diff --git a/src/turbomind/engine/gateway.cc b/src/turbomind/engine/gateway.cc index 47b5ecd383..82e3bd119a 100644 --- a/src/turbomind/engine/gateway.cc +++ b/src/turbomind/engine/gateway.cc @@ -40,7 +40,7 @@ void Gateway::push(std::shared_ptr r) rank = next_.fetch_add(1, std::memory_order_relaxed) % size_; } else { - TM_LOG_ERROR("[Gateway] No queues available for submitting the request"); + TM_LOG_ERROR("No queues available for submitting the request"); notify({[r = std::move(r)] { UpdateState(*r, Request::kNoQueue, 0); }}); return; } @@ -49,7 +49,7 @@ void Gateway::push(std::shared_ptr r) queues_[rank]->push({std::move(r)}); } else { - TM_LOG_ERROR("[Gateway] Failed to find a binded queue for %lu", r->session.id); + TM_LOG_ERROR("Failed to find a binded queue for {}", r->session.id); notify({[r = std::move(r)] { UpdateState(*r, Request::kInvalid, 0); }}); } } @@ -134,7 +134,7 @@ void Gateway::kill(std::shared_ptr r) queues_[rank]->kill(std::move(r)); } else { - TM_LOG_ERROR("[Gateway] Failed to find a binded queue for %lu", r->session.id); + TM_LOG_ERROR("Failed to find a binded queue for {}", r->session.id); notify({[r = std::move(r)] { // UpdateState(*r, Request::kInvalid, 0); }}); diff --git a/src/turbomind/engine/gateway.h b/src/turbomind/engine/gateway.h index b389557a91..ddd80892d0 100644 --- a/src/turbomind/engine/gateway.h +++ b/src/turbomind/engine/gateway.h @@ -10,10 +10,10 @@ #include "src/turbomind/comm/host_comm.h" #include "src/turbomind/core/check.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/engine/request.h" #include "src/turbomind/engine/request_queue.h" #include "src/turbomind/engine/signal_buffer.h" -#include "src/turbomind/utils/logger.h" namespace turbomind { @@ -33,7 +33,7 @@ class SequenceBinding { std::lock_guard lock{mutex_}; for (const auto& x : seq_ids) { if (auto [it, success] = map_.emplace(x, rank); !success) { - TM_LOG_WARNING("[TM][Gateway] Duplicated binding for %lu, %d vs %d", x, rank, it->second); + TM_LOG_WARN("Duplicated binding for {}, {} vs {}", x, rank, it->second); } } } @@ -44,10 +44,10 @@ class SequenceBinding { for (const auto& x : seq_ids) { auto it = map_.find(x); if (it == map_.end()) { - TM_LOG_WARNING("[TM][Gateway] No entry found for unbinding %lu, %d", x, rank); + TM_LOG_WARN("No entry found for unbinding {}, {}", x, rank); } else if (it->second != rank) { - TM_LOG_WARNING("[TM][Gateway] Mismatched entry for unbinding %lu, %d vs %d", x, rank, it->second); + TM_LOG_WARN("Mismatched entry for unbinding {}, {} vs {}", x, rank, it->second); } else { map_.erase(it); @@ -84,7 +84,7 @@ class Gateway { void set_threshold(int value) { - TM_LOG_INFO("set threshold %d -> %d", dp_thr_, value); + TM_LOG_INFO("set threshold {} -> {}", dp_thr_, value); dp_thr_ = value; } diff --git a/src/turbomind/engine/request.cc b/src/turbomind/engine/request.cc index d4d6097d75..f8bc0b28e0 100644 --- a/src/turbomind/engine/request.cc +++ b/src/turbomind/engine/request.cc @@ -53,10 +53,10 @@ void UpdateState(Request& r, int status, int seq_len) } } catch (const std::exception& e) { - TM_LOG_ERROR("Error invoking callback for (%lu): %s", r.id, e.what()); + TM_LOG_ERROR("Error invoking callback for ({}): {}", r.id, e.what()); } catch (...) { - TM_LOG_ERROR("Unknown error invoking callback for (%lu)", r.id); + TM_LOG_ERROR("Unknown error invoking callback for ({})", r.id); } } diff --git a/src/turbomind/generation/logits_processor.cc b/src/turbomind/generation/logits_processor.cc index 7448bb6b42..8de90a08d1 100644 --- a/src/turbomind/generation/logits_processor.cc +++ b/src/turbomind/generation/logits_processor.cc @@ -66,7 +66,7 @@ void LogitsProcessor::Forward(int phase, TensorMap& env) { // apply repetition penalty -> ban bad words -> min length penalty -> temperature penalty // the order is same with transformerss - TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__); + TM_LOG_DEBUG("{} start", __PRETTY_FUNCTION__); Tensor_ logits = env.at("logits"); const Buffer_ token_ids_ptrs = env.at("token_ids_ptrs").buffer(); @@ -115,12 +115,12 @@ void LogitsProcessor::Forward(int phase, TensorMap& env) sync_check_cuda_error(); } - TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__); + TM_LOG_DEBUG("{} stop", __PRETTY_FUNCTION__); } void LogitsProcessor::Setup(int phase, TensorMap& env) { - TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__); + TM_LOG_DEBUG("{} start", __PRETTY_FUNCTION__); auto& d = *data_.at(phase); @@ -199,13 +199,13 @@ void LogitsProcessor::Setup(int phase, TensorMap& env) continue; } if (TM_UNLIKELY(eos_ids.size() > kMaxEndIdsSize)) { - TM_LOG_WARNING("[InitializeSampling] [%ld] eos length (%d) exceeds %d, truncated to %d", - (long)rs[i]->req->id, - (int)eos_ids.size(), - kMaxEndIdsSize, - kMaxEndIdsSize); + TM_LOG_WARN("ID {}: eos length ({}) exceeds {}, truncated to {}", + rs[i]->req->id, + eos_ids.size(), + kMaxEndIdsSize, + kMaxEndIdsSize); } - std::copy_n(eos_ids.begin(), std::min((int)eos_ids.size(), kMaxEndIdsSize), h_end_ids); + std::copy_n(eos_ids.begin(), std::min(static_cast(eos_ids.size()), kMaxEndIdsSize), h_end_ids); h_end_ids += max_length; } copy(buf_->end_ids_buf, bsz * max_length, d.end_ids_buf); @@ -213,7 +213,7 @@ void LogitsProcessor::Setup(int phase, TensorMap& env) } } - TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__); + TM_LOG_DEBUG("{} stop", __PRETTY_FUNCTION__); } } // namespace turbomind diff --git a/src/turbomind/generation/sampling.cc b/src/turbomind/generation/sampling.cc index c799c76369..6b1d9d38f2 100644 --- a/src/turbomind/generation/sampling.cc +++ b/src/turbomind/generation/sampling.cc @@ -23,8 +23,8 @@ #include "src/turbomind/engine/batch.h" #include "src/turbomind/engine/request.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/utils/constant.h" -#include "src/turbomind/utils/logger.h" namespace turbomind { @@ -87,7 +87,7 @@ void Sampling::Forward(int phase, TensorMap& args) // step2: // - sampling from left and sorted scores - TM_LOG_DEBUG("%s start", __PRETTY_FUNCTION__); + TM_LOG_DEBUG("{} start", __PRETTY_FUNCTION__); auto& d = *data_.at(phase); @@ -168,7 +168,7 @@ void Sampling::Forward(int phase, TensorMap& args) sync_check_cuda_error(); } - TM_LOG_DEBUG("%s stop", __PRETTY_FUNCTION__); + TM_LOG_DEBUG("{} stop", __PRETTY_FUNCTION__); } void Sampling::Setup(int phase, TensorMap& env) diff --git a/src/turbomind/generation/utils.h b/src/turbomind/generation/utils.h index f31e8fd19c..b9d91457d3 100644 --- a/src/turbomind/generation/utils.h +++ b/src/turbomind/generation/utils.h @@ -36,12 +36,12 @@ void init_stop_bad_words(G getter, const char* key, const Rs& rs, T* h_buf, T* d offsets.begin() + std::min(kMaxStopBadWordsLen, (int)offsets.size()), kMaxStopBadWordsLen) - offsets.begin(); - TM_LOG_WARNING("[InitializeSampling] [%ld] %s length (%d) exceeds %d, truncated to %d", - rs[i]->req->id, - key, - offsets.back(), - kMaxStopBadWordsLen, - trunc_offset_size); + TM_LOG_WARN("ID {}: {} length ({}) exceeds {}, truncated to {}", + rs[i]->req->id, + key, + offsets.back(), + kMaxStopBadWordsLen, + trunc_offset_size); if (trunc_offset_size > 0) { int trunc_token_size = offsets[trunc_offset_size - 1]; copy_tokens[i] = std::make_pair(token_ids.data(), trunc_token_size); diff --git a/src/turbomind/kernels/attention/CMakeLists.txt b/src/turbomind/kernels/attention/CMakeLists.txt index a809860436..5ffd5ea61f 100644 --- a/src/turbomind/kernels/attention/CMakeLists.txt +++ b/src/turbomind/kernels/attention/CMakeLists.txt @@ -32,7 +32,9 @@ if (BUILD_TEST) nvidia::cutlass::cutlass models unfused_attention_kernels - logger + core + cuda_utils + fmt::fmt cublas) add_executable(test_quant test_quant.cu test_utils.cu) diff --git a/src/turbomind/kernels/decoding_kernels.cu b/src/turbomind/kernels/decoding_kernels.cu index af7eca9aee..6952e6eb90 100644 --- a/src/turbomind/kernels/decoding_kernels.cu +++ b/src/turbomind/kernels/decoding_kernels.cu @@ -18,6 +18,7 @@ #include "src/turbomind/kernels/reduce_kernel_utils.cuh" #include "src/turbomind/utils/cuda_type_utils.cuh" #include "src/turbomind/utils/cuda_utils.h" +#include "src/turbomind/utils/string_utils.h" namespace turbomind { diff --git a/src/turbomind/kernels/logprob_kernels.cu b/src/turbomind/kernels/logprob_kernels.cu index 0357e88f6f..702b2148e7 100644 --- a/src/turbomind/kernels/logprob_kernels.cu +++ b/src/turbomind/kernels/logprob_kernels.cu @@ -26,10 +26,10 @@ #include "3rdparty/cub/cub.cuh" #endif +#include "src/turbomind/core/logger.h" #include "src/turbomind/kernels/logprob_kernels.h" #include "src/turbomind/kernels/reduce_kernel_utils.cuh" #include "src/turbomind/macro.h" -#include "src/turbomind/utils/logger.h" namespace turbomind { @@ -159,7 +159,7 @@ void invokeLogProbFromLogits(float* cum_log_probs, // input_lengths: [batch_size] // workspace: workspace buffer of size at least sizeof(float) * max_input_length * batch_size. - TM_LOG_DEBUG(__PRETTY_FUNCTION__); + TM_LOG_DEBUG("{}", __PRETTY_FUNCTION__); // block_size should be multiple of 32 to use warpReduceMax. const int block_size = vocab_size < 1024 ? (vocab_size + 31) / 32 * 32 : 1024; assert(block_size % 32 == 0); diff --git a/src/turbomind/kernels/sampling_penalty_kernels.cu b/src/turbomind/kernels/sampling_penalty_kernels.cu index 21ddd49271..d424aeba61 100644 --- a/src/turbomind/kernels/sampling_penalty_kernels.cu +++ b/src/turbomind/kernels/sampling_penalty_kernels.cu @@ -187,7 +187,7 @@ void ApplyRepetitionPenalty(Tensor& logits, if (smem_size > (48 << 10)) { TM_CHECK_EQ(cudaFuncSetAttribute(func, cudaFuncAttributeMaxDynamicSharedMemorySize, smem_size), 0); } - TM_LOG_DEBUG("smem_size = %d", smem_size); + TM_LOG_DEBUG("smem_size = {}", smem_size); func<<>>( logits.data(), penalties.data(), token_ids_ptrs.data(), sequence_length.data(), vocab_size, mask_size); }; diff --git a/src/turbomind/kernels/sampling_topk_kernels.cu b/src/turbomind/kernels/sampling_topk_kernels.cu index 51974e076f..f564e1ea13 100644 --- a/src/turbomind/kernels/sampling_topk_kernels.cu +++ b/src/turbomind/kernels/sampling_topk_kernels.cu @@ -30,6 +30,7 @@ #include "src/turbomind/kernels/sampling_topk_kernels.h" #include "src/turbomind/utils/constant.h" +#include "src/turbomind/utils/string_utils.h" namespace turbomind { diff --git a/src/turbomind/kernels/sampling_topk_kernels.h b/src/turbomind/kernels/sampling_topk_kernels.h index e1e0d5bbad..d4ae94fb6a 100644 --- a/src/turbomind/kernels/sampling_topk_kernels.h +++ b/src/turbomind/kernels/sampling_topk_kernels.h @@ -16,7 +16,7 @@ */ #pragma once -#include "src/turbomind/utils/logger.h" +#include "src/turbomind/core/logger.h" #include namespace turbomind { diff --git a/src/turbomind/models/CMakeLists.txt b/src/turbomind/models/CMakeLists.txt index 894518f547..10b8fe9d00 100644 --- a/src/turbomind/models/CMakeLists.txt +++ b/src/turbomind/models/CMakeLists.txt @@ -40,7 +40,6 @@ target_link_libraries(models PUBLIC gpt_kernels memory_utils cuda_utils - logger anomaly_handler) target_compile_options(models PRIVATE $<$:-Xptxas=-v --generate-line-info --threads 8>) diff --git a/src/turbomind/models/llama/Barrier.h b/src/turbomind/models/llama/Barrier.h index 509290a5fb..9959e1cb01 100644 --- a/src/turbomind/models/llama/Barrier.h +++ b/src/turbomind/models/llama/Barrier.h @@ -2,8 +2,8 @@ #pragma once +#include "src/turbomind/core/logger.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" #ifndef _MSC_VER #include #endif @@ -16,7 +16,7 @@ class Barrier { public: Barrier(unsigned count) { - TM_LOG_INFO("Barrier(%d)", (int)count); + TM_LOG_INFO("Barrier({})", (int)count); FT_CHECK(count == 1); } diff --git a/src/turbomind/models/llama/BlockManager.cc b/src/turbomind/models/llama/BlockManager.cc index 707430e80e..b8a5001cf9 100644 --- a/src/turbomind/models/llama/BlockManager.cc +++ b/src/turbomind/models/llama/BlockManager.cc @@ -2,9 +2,9 @@ #include +#include "src/turbomind/core/logger.h" #include "src/turbomind/models/llama/BlockManager.h" #include "src/turbomind/utils/debug_utils.h" -#include "src/turbomind/utils/logger.h" #include "src/turbomind/utils/string_utils.h" namespace turbomind { @@ -30,9 +30,9 @@ BlockManager::BlockManager( chunk_size_ = chunk_size; } - TM_LOG_INFO("[BlockManager] block_size = %.3f MB", (float)block_size_ / (1 << 20)); - TM_LOG_INFO("[BlockManager] max_block_count = %d", max_block_count_); - TM_LOG_INFO("[BlockManager] chunk_size = %d", chunk_size_); + TM_LOG_INFO("block_size = {:.3f} MB", (float)block_size_ / (1 << 20)); + TM_LOG_INFO("max_block_count = {}", max_block_count_); + TM_LOG_INFO("chunk_size = {}", chunk_size_); blocks_.reserve(max_block_count_); diff --git a/src/turbomind/models/llama/BlockManager.h b/src/turbomind/models/llama/BlockManager.h index 8b50353a25..de2d9e0384 100644 --- a/src/turbomind/models/llama/BlockManager.h +++ b/src/turbomind/models/llama/BlockManager.h @@ -3,9 +3,9 @@ #pragma once #include "src/turbomind/core/allocator.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/models/llama/Barrier.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" #include #include #include diff --git a/src/turbomind/models/llama/BlockTrie.cc b/src/turbomind/models/llama/BlockTrie.cc index d63bbcf1fd..4046741bd2 100644 --- a/src/turbomind/models/llama/BlockTrie.cc +++ b/src/turbomind/models/llama/BlockTrie.cc @@ -41,7 +41,7 @@ std::tuple BlockTrie::Match(const Sequence& seq) first += block_seq_len_; } else { - TM_LOG_WARNING("hash collision detected"); + TM_LOG_WARN("hash collision detected"); break; } } @@ -85,7 +85,7 @@ std::tuple BlockTrie::Cache(const Sequence& seq, const std: node->block_unique_id = block_unique_id; } else { - TM_LOG_WARNING("[BlockTrie][cache] Hash collision detected"); + TM_LOG_WARN("Hash collision detected"); break; } } @@ -102,7 +102,7 @@ std::tuple BlockTrie::Cache(const Sequence& seq, const std: cache_block_unique_ids.emplace_back(block_unique_id); } - TM_LOG_INFO("[BlockTrie][cache] %d new tokens cached", new_cached); + TM_LOG_INFO("{} new tokens cached", new_cached); return std::make_tuple(cache_block_ids, cache_block_unique_ids); } diff --git a/src/turbomind/models/llama/CMakeLists.txt b/src/turbomind/models/llama/CMakeLists.txt index aaa7467f64..121b80f09a 100644 --- a/src/turbomind/models/llama/CMakeLists.txt +++ b/src/turbomind/models/llama/CMakeLists.txt @@ -45,5 +45,4 @@ target_link_libraries(Llama PUBLIC CUDA::cudart gpt_kernels memory_utils cuda_utils - logger anomaly_handler) diff --git a/src/turbomind/models/llama/GatedDeltaNetLayer.cc b/src/turbomind/models/llama/GatedDeltaNetLayer.cc index e2e4824c03..e5a7459143 100644 --- a/src/turbomind/models/llama/GatedDeltaNetLayer.cc +++ b/src/turbomind/models/llama/GatedDeltaNetLayer.cc @@ -2,10 +2,10 @@ #include "src/turbomind/core/allocator.h" #include "src/turbomind/core/check.h" #include "src/turbomind/core/data_type.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/models/llama/SequenceManager.h" #include "src/turbomind/models/llama/gated_delta_net_kernels.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" namespace turbomind { @@ -36,8 +36,8 @@ GatedDeltaNetLayer::GatedDeltaNetLayer(const ModelParam& model, ++num_linear_layers_; } - TM_LOG_INFO("GatedDeltaNetLayer: num_k=%d num_v=%d k_dim=%d v_dim=%d " - "conv_dim=%d d_conv=%d num_linear_layers=%d", + TM_LOG_INFO("GatedDeltaNetLayer: num_k={} num_v={} k_dim={} v_dim={} " + "conv_dim={} d_conv={} num_linear_layers={}", num_k_heads_, num_v_heads_, key_dim_, diff --git a/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc b/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc index f9774671f1..ca7fc25e97 100644 --- a/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc +++ b/src/turbomind/models/llama/LlamaDecoderLayerWeight.cc @@ -26,10 +26,10 @@ #include "src/turbomind/models/llama/LlamaDecoderLayerWeight.h" #include "src/turbomind/core/data_type.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/models/llama/LlamaDenseWeight.h" #include "src/turbomind/models/llama/llama_params.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" namespace turbomind { @@ -40,7 +40,7 @@ static bool is_fuse_silu_act() if (str) { try { auto v = std::stoi(str) != 0; - TM_LOG_INFO("TM_FUSE_SILU_ACT=%d", (int)v); + TM_LOG_INFO("TM_FUSE_SILU_ACT={}", (int)v); return v; } catch (...) { diff --git a/src/turbomind/models/llama/LlamaLinear.cu b/src/turbomind/models/llama/LlamaLinear.cu index ab0908546a..8578f0144e 100644 --- a/src/turbomind/models/llama/LlamaLinear.cu +++ b/src/turbomind/models/llama/LlamaLinear.cu @@ -169,7 +169,7 @@ struct LlamaLinear::Impl { core::Context::stream().handle()); if (ec) { - TM_LOG_ERROR("%s: %d", __PRETTY_FUNCTION__, ec); + TM_LOG_ERROR("{}: {}", __PRETTY_FUNCTION__, ec); } } diff --git a/src/turbomind/models/llama/LlamaWeight.cc b/src/turbomind/models/llama/LlamaWeight.cc index 33c66bd174..26ba9ca198 100644 --- a/src/turbomind/models/llama/LlamaWeight.cc +++ b/src/turbomind/models/llama/LlamaWeight.cc @@ -49,11 +49,11 @@ LlamaWeight::LlamaWeight(DataType data_type, { if (vocab_size_padded_ % tp_size_ != 0) { vocab_size_padded_ = (vocab_size_ + tp_size_ - 1) / tp_size_ * tp_size_; - TM_LOG_WARNING("pad vocab size from %d to %d", vocab_size_, vocab_size_padded_); + TM_LOG_WARN("pad vocab size from {} to {}", vocab_size_, vocab_size_padded_); } if (embedding_size_ % tp_size_ != 0) { embedding_size_ = (embedding_size_ + tp_size_ - 1) / tp_size_ * tp_size_; - TM_LOG_WARNING("pad embed size from %d to %d", embedding_size_, embedding_size_); + TM_LOG_WARN("pad embed size from {} to {}", embedding_size_, embedding_size_); } FT_CHECK(hidden_units_ % tp_size_ == 0); TM_CHECK_EQ(vocab_size_padded_ % tp_size_, 0); diff --git a/src/turbomind/models/llama/SequenceManager.cc b/src/turbomind/models/llama/SequenceManager.cc index e683caa494..ea2817727d 100644 --- a/src/turbomind/models/llama/SequenceManager.cc +++ b/src/turbomind/models/llama/SequenceManager.cc @@ -5,10 +5,10 @@ #include #include +#include "src/turbomind/core/logger.h" #include "src/turbomind/kernels/attention/block.h" #include "src/turbomind/models/llama/BlockManager.h" #include "src/turbomind/models/llama/SequenceManager.h" -#include "src/turbomind/utils/logger.h" // #include "dbg.h" @@ -82,15 +82,15 @@ SequenceManager::SequenceManager(const ModelParam& model_param, for (int slot = max_batch_size - 1; slot >= 0; --slot) { free_linear_state_slots_.push_back(slot); } - TM_LOG_INFO("[SeqMgr] linear-state slot pool initialized: %d slots", max_batch_size); + TM_LOG_INFO("[SeqMgr] linear-state slot pool initialized: {} slots", max_batch_size); const auto conv_one = pooled_conv_states_.slice(0, 1).squeeze(0); const auto recurrent_one = pooled_recurrent_states_.slice(0, 1).squeeze(0); const double mb = 1.0 / (1024.0 * 1024.0); - TM_LOG_INFO("[SeqMgr] linear-state per slot: conv %.2f MB + recurrent %.2f MB = %.2f MB", + TM_LOG_INFO("[SeqMgr] linear-state per slot: conv {:.2f} MB + recurrent {:.2f} MB = {:.2f} MB", conv_one.byte_size() * mb, recurrent_one.byte_size() * mb, (conv_one.byte_size() + recurrent_one.byte_size()) * mb); - TM_LOG_INFO("[SeqMgr] linear-state combined total: %.2f MB", + TM_LOG_INFO("[SeqMgr] linear-state combined total: {:.2f} MB", (pooled_conv_states_.byte_size() + pooled_recurrent_states_.byte_size()) * mb); } @@ -115,12 +115,12 @@ SequenceManager::SequenceManager(const ModelParam& model_param, if (num_linear_layers > 0 && block_count < 1.) { const size_t linear_bytes = pooled_conv_states_.byte_size() + pooled_recurrent_states_.byte_size(); const size_t target_bytes = static_cast(free_before * block_count); - TM_LOG_INFO("[SeqMgr] Adjusting block_count: free_before %.2f MB, linear %.2f MB, target %.2f MB", + TM_LOG_INFO("[SeqMgr] Adjusting block_count: free_before {:.2f} MB, linear {:.2f} MB, target {:.2f} MB", free_before / (1024. * 1024.), linear_bytes / (1024. * 1024.), target_bytes / (1024. * 1024.)); if (target_bytes <= linear_bytes) { - TM_LOG_ERROR("[SeqMgr] Linear-state memory (%.2f MB) >= cache budget (%.2f MB). ", + TM_LOG_ERROR("[SeqMgr] Linear-state memory ({:.2f} MB) >= cache budget ({:.2f} MB). ", linear_bytes / (1024. * 1024.), target_bytes / (1024. * 1024.)); TM_CHECK(0) @@ -128,7 +128,7 @@ SequenceManager::SequenceManager(const ModelParam& model_param, } const size_t cache_bytes = target_bytes - linear_bytes; block_count = static_cast(cache_bytes) / static_cast(block_size); - TM_LOG_INFO("[SeqMgr] Adjusted block_count to %.0f", block_count); + TM_LOG_INFO("[SeqMgr] Adjusted block_count to {:.0f}", block_count); } block_manager_ = std::make_shared(block_size, block_count, chunk_size, allocator, get_free_size); @@ -136,7 +136,7 @@ SequenceManager::SequenceManager(const ModelParam& model_param, if (enable_prefix_caching) { block_trie_ = std::make_shared(block_config.block_len_, block_manager_); } - TM_LOG_WARNING("[SegMgr] prefix caching is %s", enable_prefix_caching ? "enabled" : "disabled"); + TM_LOG_WARN("prefix caching is {}", enable_prefix_caching ? "enabled" : "disabled"); } const Sequence* SequenceManager::Create(uint64_t id) @@ -145,13 +145,13 @@ const Sequence* SequenceManager::Create(uint64_t id) auto it = sequences_.find(id); if (it != sequences_.end()) { if (rank_ == 0) { - TM_LOG_WARNING("[SeqMgr][Create] Removing conflicting ID %llu", id); + TM_LOG_WARN("Removing conflicting ID {}", id); } Erase(it); } it = sequences_.emplace_hint(it, id, std::move(sequence)); if (rank_ == 0) { - TM_LOG_INFO("[SeqMgr][Create] ID %llu", id); + TM_LOG_INFO("ID {}", id); } return &it->second; } @@ -272,10 +272,10 @@ void SequenceManager::CachePrompt(const Sequences& sequences, int active_size) const auto& [block_ids, unique_ids] = block_trie_->Cache(seq, seq.prompt); if (rank_ == 0) { // clang-format off - TM_LOG_INFO("[SeqMgr][CachePrompt] ID %llu, cached blocks %d, tokens %d", seq.id, + TM_LOG_INFO("ID {}, cached blocks {}, tokens {}", seq.id, (int)block_ids.size(), (int)seq.prompt.size()); - TM_LOG_DEBUG("[SeqMgr][CachePrompt] ID %llu, cached block_ids %s, unique_ids %s", seq.id, - vector2string(block_ids).c_str(), vector2string(unique_ids).c_str()); + TM_LOG_DEBUG("ID {}, cached block_ids {}, unique_ids {}", seq.id, + vector2string(block_ids), vector2string(unique_ids)); // clang-format on } if (seq.cache_len >= seq.prompt.size()) { @@ -295,10 +295,10 @@ void SequenceManager::CacheGeneration(const Sequence& seq) if (rank_ == 0) { // clang-format off - TM_LOG_INFO("[SeqMgr][CacheGeneration] ID %llu, cached blocks %d, tokens %d", + TM_LOG_INFO("ID {}, cached blocks {}, tokens {}", seq.id, (int)block_ids.size(), (int)seq.tokens.size()); - TM_LOG_DEBUG("[SeqMgr][CacheGeneration] ID %llu, cached block_ids %s, unique_ids %s", seq.id, - vector2string(block_ids).c_str(), vector2string(unique_ids).c_str()); + TM_LOG_DEBUG("ID {}, cached block_ids {}, unique_ids {}", seq.id, + vector2string(block_ids), vector2string(unique_ids)); // clang-format on } } @@ -595,9 +595,9 @@ void SequenceManager::PrefixMatch(Sequences& sequences, const std::vector& if (rank_ == 0) { // clang-format off - TM_LOG_INFO("[SeqMgr][match] ID %llu, hit blocks %d, cache_len %d", seq.id, (int)block_ids.size(), seq.cache_len); - TM_LOG_DEBUG("[SeqMgr][match] ID %llu, hit block_ids %s, unique_ids %s", seq.id, - vector2string(block_ids).c_str(), vector2string(unique_ids).c_str()); + TM_LOG_INFO("ID {}, hit blocks {}, cache_len {}", seq.id, (int)block_ids.size(), seq.cache_len); + TM_LOG_DEBUG("ID {}, hit block_ids {}, unique_ids {}", seq.id, + vector2string(block_ids), vector2string(unique_ids)); // clang-format on } @@ -616,10 +616,10 @@ void SequenceManager::PrefixMatch(Sequences& sequences, const std::vector& if (rank_ == 0) { // clang-format off - TM_LOG_INFO("[SeqMgr][match] ID %llu, after matching, blocks %d, cache_len %d", + TM_LOG_INFO("ID {}, after matching, blocks {}, cache_len {}", seq.id, seq.blocks.size(), seq.cache_len); - TM_LOG_DEBUG("[SeqMgr][match] ID %llu, after matching, block_ids %s, unique_ids %s", seq.id, - vector2string(seq.blocks).c_str(), vector2string(seq.block_unique_ids).c_str()); + TM_LOG_DEBUG("ID {}, after matching, block_ids {}, unique_ids {}", seq.id, + vector2string(seq.blocks), vector2string(seq.block_unique_ids)); // clang-format on } } @@ -694,7 +694,7 @@ auto SequenceManager::Materialize(Sequences sequences, // release preempted blocks -> cached if (!schedule.victims.empty()) { - TM_LOG_INFO("[SeqMgr] #victim: %d", (int)schedule.victims.size()); + TM_LOG_WARN("#victim: {}", (int)schedule.victims.size()); for (const auto& p : schedule.victims) { UpdateAndSetUnlock(*p); } @@ -723,7 +723,7 @@ auto SequenceManager::Materialize(Sequences sequences, } } - // TM_LOG_ERROR("active: %4d, cached: %4d, free: %4d", + // TM_LOG_ERROR("active: {:4}, cached: {:4}, free: {:4}", // block_manager_->active_count(), // block_manager_->cached_count(), // block_manager_->free_count()); diff --git a/src/turbomind/models/llama/SequenceManager.h b/src/turbomind/models/llama/SequenceManager.h index 8aee1f41b3..fff2706379 100644 --- a/src/turbomind/models/llama/SequenceManager.h +++ b/src/turbomind/models/llama/SequenceManager.h @@ -3,6 +3,7 @@ #pragma once #include +#include #include #include "src/turbomind/core/allocator.h" diff --git a/src/turbomind/models/llama/llama_kernels.cu b/src/turbomind/models/llama/llama_kernels.cu index 0b70f92fd7..fb0e96b3a8 100644 --- a/src/turbomind/models/llama/llama_kernels.cu +++ b/src/turbomind/models/llama/llama_kernels.cu @@ -16,6 +16,7 @@ #include "src/turbomind/models/llama/llama_kernels.h" #include "src/turbomind/utils/cuda_utils.h" #include "src/turbomind/utils/dispatch.h" +#include "src/turbomind/utils/string_utils.h" namespace turbomind { diff --git a/src/turbomind/models/llama/unified_attention_layer.cc b/src/turbomind/models/llama/unified_attention_layer.cc index b78efe0dae..390ea72926 100644 --- a/src/turbomind/models/llama/unified_attention_layer.cc +++ b/src/turbomind/models/llama/unified_attention_layer.cc @@ -45,9 +45,9 @@ #include "src/turbomind/models/llama/mla_utils.h" #include "src/turbomind/models/llama/unified_attention_layer.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/utils/anomaly_handler.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" // #include "dbg.h" @@ -190,8 +190,8 @@ static void init_dynamic_ntk(RequestCache& cache, const RopeParam& rope) scaling_factor = scaling_factor * max_seq_len / max_pos_emb - (scaling_factor - 1); cache.rope_base *= powf(scaling_factor, rope.dim / (rope.dim - 2.f)); // clang-format off - TM_LOG_INFO("[ProcessInferRequests] %ld rope_scaling_factor: %f, rope_theta = %f", - (long)cache.req->id, scaling_factor, cache.rope_base); + TM_LOG_INFO("{} rope_scaling_factor: {}, rope_theta = {}", + cache.req->id, scaling_factor, cache.rope_base); // clang-format on } } @@ -299,7 +299,7 @@ void UnifiedAttentionLayer::Setup(int phase, TensorMap& env) void UnifiedAttentionLayer::Forward(ForwardParam p) { - TM_LOG_DEBUG(__PRETTY_FUNCTION__); + TM_LOG_DEBUG("{}", __PRETTY_FUNCTION__); ///////////////////////////////////////////// /// parse inputs diff --git a/src/turbomind/python/bind.cpp b/src/turbomind/python/bind.cpp index 084cc026dc..535c284535 100644 --- a/src/turbomind/python/bind.cpp +++ b/src/turbomind/python/bind.cpp @@ -257,7 +257,7 @@ static void safe_memcpy(void* dst, const void* src, size_t size) catch (...) { int device_id{-1}; cudaGetDevice(&device_id); - TM_LOG_ERROR("cudaMemcpy failed: dst=(%d, %d, %p, %p), src=(%d, %d, %p, %p), size=%s, device=%d", + TM_LOG_ERROR("cudaMemcpy failed: dst=({}, {}, {}, {}), src=({}, {}, {}, {}), size={}, device={}", (int)dat.type, dat.device, dat.devicePointer, @@ -266,7 +266,7 @@ static void safe_memcpy(void* dst, const void* src, size_t size) sat.device, sat.devicePointer, sat.hostPointer, - std::to_string(size).c_str(), + size, device_id); throw; } diff --git a/src/turbomind/turbomind.cc b/src/turbomind/turbomind.cc index 5911e1d379..1529269fde 100644 --- a/src/turbomind/turbomind.cc +++ b/src/turbomind/turbomind.cc @@ -69,7 +69,7 @@ static void parse_default_rope_param(const YAML::Node& node, RopeParam& param) param.base = node["base"].as(); param.dim = node["dim"].as(); if (param.base == 0.f || param.dim == 0) { - TM_LOG_ERROR("invalid rope param: base = %f, dim = %d", param.base, param.dim); + TM_LOG_ERROR("invalid rope param: base = {}, dim = {}", param.base, param.dim); FT_CHECK(0); } } @@ -309,20 +309,19 @@ struct TurboMind::Impl { { if (!engine_param_.max_context_token_num) { engine_param_.max_context_token_num = engine_param_.session_len; - TM_LOG_WARNING("[TM] `max_context_token_num` is not set, default to %d.", - (int)engine_param_.max_context_token_num); + TM_LOG_WARN("`max_context_token_num` is not set, default to {}.", (int)engine_param_.max_context_token_num); } if (engine_param_.max_context_token_num <= engine_param_.max_batch_size) { engine_param_.max_context_token_num *= engine_param_.session_len; - TM_LOG_WARNING("[TM] `max_context_token_num` = %d.", (int)engine_param_.max_context_token_num); + TM_LOG_WARN("`max_context_token_num` = {}.", (int)engine_param_.max_context_token_num); } } }; TurboMind::Impl::~Impl() { - TM_LOG_INFO(__PRETTY_FUNCTION__); + TM_LOG_INFO("{}", __PRETTY_FUNCTION__); if (gateway_) { gateway_->shutdown(); } @@ -391,7 +390,7 @@ TurboMind::Impl::Impl(string model_dir, string config, FFICtxFactory ffi_ctx_fac model_param_.layer_types.push_back(0); } else { - TM_LOG_WARNING("[TM] Unknown layer_type '%s', treating as full_attention.", type_str.c_str()); + TM_LOG_WARN("Unknown layer_type '{}', treating as full_attention.", type_str); model_param_.layer_types.push_back(0); } } @@ -665,7 +664,7 @@ void TurboMind::Impl::WarmUp(int index) std::ifstream ifs(str); const int n_imported = linear.Import(ifs); if (index == 0) { - TM_LOG_INFO("[GEMM] %d records imported", n_imported); + TM_LOG_INFO("{} records imported", n_imported); } return; } @@ -698,7 +697,7 @@ void TurboMind::Impl::WarmUp(int index) } auto str = Join(bss.begin(), bss.end(), ", "); - TM_LOG_INFO("[Engine] Warm-up lengths: %s", str.c_str()); + TM_LOG_INFO("Warm-up lengths: {}", str); if (!bss.empty()) { const auto max_bs = *std::max_element(bss.begin(), bss.end()); @@ -713,7 +712,7 @@ void TurboMind::Impl::WarmUp(int index) for (auto token_num : bss) { - TM_LOG_INFO("[WarmUp] %d", token_num); + TM_LOG_INFO("{}", token_num); auto r = CreateRequest(); @@ -746,13 +745,13 @@ void TurboMind::Impl::WarmUp(int index) } if (status != Request::kFinish) { - TM_LOG_ERROR("[Engine] Warm-up for %d tokens failed with status %d", (int)token_num, (int)status); + TM_LOG_ERROR("Warm-up for {} tokens failed with status {}", (int)token_num, (int)status); } } auto tock = std::chrono::steady_clock::now(); - TM_LOG_INFO("[WarmUp] Warm-up finished in %.2f seconds.", + TM_LOG_INFO("Warm-up finished in {:.2f} seconds.", std::chrono::duration>(tock - tick).count()); } } @@ -766,7 +765,7 @@ void TurboMind::Impl::WarmUp(int index) if (auto path = std::getenv("TM_GEMM_EXPORT")) { std::ofstream ofs(path); const auto n_records = linear.Export(ofs); - TM_LOG_INFO("[GEMM] %d records exported.", n_records); + TM_LOG_INFO("{} records exported.", n_records); } gateway_->set_threshold(1); diff --git a/src/turbomind/utils/CMakeLists.txt b/src/turbomind/utils/CMakeLists.txt index c6c67a4d29..eb79fb8fdb 100644 --- a/src/turbomind/utils/CMakeLists.txt +++ b/src/turbomind/utils/CMakeLists.txt @@ -16,16 +16,10 @@ cmake_minimum_required(VERSION 3.11) find_package(CUDAToolkit REQUIRED) -add_library(logger STATIC logger.cc) -set_property(TARGET logger PROPERTY POSITION_INDEPENDENT_CODE ON) -set_property(TARGET logger PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) -target_link_libraries(logger PUBLIC CUDA::cudart) - - add_library(cuda_utils STATIC cuda_utils.cc) set_property(TARGET cuda_utils PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET cuda_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) -target_link_libraries(cuda_utils PUBLIC logger CUDA::cudart CUDA::cuda_driver) +target_link_libraries(cuda_utils PUBLIC fmt::fmt CUDA::cudart CUDA::cuda_driver) add_library(nvtx_utils STATIC nvtx_utils.cc) @@ -40,12 +34,12 @@ endif() add_library(memory_utils STATIC memory_utils.cu) set_property(TARGET memory_utils PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET memory_utils PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) -target_link_libraries(memory_utils PUBLIC cuda_utils logger) +target_link_libraries(memory_utils PUBLIC cuda_utils) add_library(anomaly_handler STATIC anomaly_handler.cu) set_property(TARGET anomaly_handler PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET anomaly_handler PROPERTY CUDA_RESOLVE_DEVICE_SYMBOLS ON) -target_link_libraries(anomaly_handler PUBLIC cuda_utils logger) +target_link_libraries(anomaly_handler PUBLIC cuda_utils) add_library(parser STATIC parser.cc) set_property(TARGET parser PROPERTY POSITION_INDEPENDENT_CODE ON) diff --git a/src/turbomind/utils/anomaly_handler.cu b/src/turbomind/utils/anomaly_handler.cu index fd77786da8..2355437fa3 100644 --- a/src/turbomind/utils/anomaly_handler.cu +++ b/src/turbomind/utils/anomaly_handler.cu @@ -8,10 +8,10 @@ #include #include "src/turbomind/core/data_type.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/models/llama/llama_utils.h" #include "src/turbomind/utils/anomaly_handler.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" #include "src/turbomind/utils/memory_utils.h" namespace turbomind { @@ -138,7 +138,7 @@ struct AnomalyHandler::Impl { g_level = static_cast(*level); } - TM_LOG_WARNING("[AnomalyHandler] level: %d", g_level); + TM_LOG_WARN("level: {}", g_level); if (!g_level) { return {}; @@ -147,33 +147,33 @@ struct AnomalyHandler::Impl { const auto pos_inf = parse_float(str, "pinf="); if (pos_inf) { g_pinf_val_ = *pos_inf; - TM_LOG_WARNING("[AnomalyHandler] +INF -> %f", g_pinf_val_); + TM_LOG_WARN("+INF -> {}", g_pinf_val_); } const auto neg_inf = parse_float(str, "ninf="); if (neg_inf) { g_ninf_val_ = *neg_inf; - TM_LOG_WARNING("[AnomalyHandler] -INF -> %f", g_ninf_val_); + TM_LOG_WARN("-INF -> {}", g_ninf_val_); } if (!pos_inf && !neg_inf) { if (const auto flush_inf = parse_float(str, "inf=")) { g_pinf_val_ = *flush_inf; g_ninf_val_ = -g_pinf_val_; - TM_LOG_WARNING("[AnomalyHandler] +INF -> %f", g_pinf_val_); - TM_LOG_WARNING("[AnomalyHandler] -INF -> %f", g_ninf_val_); + TM_LOG_WARN("+INF -> {}", g_pinf_val_); + TM_LOG_WARN("-INF -> {}", g_ninf_val_); } } if (const auto nan = parse_float(str, "nan=")) { g_nan_val_ = *nan; - TM_LOG_WARNING("[AnomalyHandler] NaN -> %f", g_nan_val_); + TM_LOG_WARN("NaN -> {}", g_nan_val_); } const auto fallback = parse_float(str, "fallback="); if (fallback) { g_fallback = *fallback; - TM_LOG_WARNING("[AnomalyHandler] fallback -> %d", g_fallback); + TM_LOG_WARN("fallback -> {}", g_fallback); } return {}; @@ -197,14 +197,14 @@ struct AnomalyHandler::Impl { // When fallback is not set from env if (fallback_ == -1) { fallback_ = fallback; - TM_LOG_WARNING("[AnomalyHandler] fallback: %d", fallback_); + TM_LOG_WARN("fallback: {}", fallback_); } FT_CHECK(0 <= fallback_); FT_CHECK(fallback_ < vocab_size); - TM_LOG_WARNING("[AnomalyHandler] max_batch_size: %d", max_batch_size); - TM_LOG_WARNING("[AnomalyHandler] vocab_size: %d", vocab_size); + TM_LOG_WARN("max_batch_size: {}", max_batch_size); + TM_LOG_WARN("vocab_size: {}", vocab_size); } } @@ -231,11 +231,11 @@ struct AnomalyHandler::Impl { const auto& n_inf = h_count_[i * 2]; const auto& n_nan = h_count_[i * 2 + 1]; if (n_inf || n_nan) { - TM_LOG_WARNING("[AnomalyHandler][rank=%d] (%s) INF: %s, NaN: %s", + TM_LOG_WARN("rank {}: ({}) INF: {}, NaN: {}", rank_, - info_[i].c_str(), - std::to_string(n_inf).c_str(), - std::to_string(n_nan).c_str()); + info_[i], + n_inf, + n_nan); ++die; } } diff --git a/src/turbomind/utils/cuda_utils.cc b/src/turbomind/utils/cuda_utils.cc index d5aa60cd81..c26a9c3c8a 100644 --- a/src/turbomind/utils/cuda_utils.cc +++ b/src/turbomind/utils/cuda_utils.cc @@ -31,12 +31,10 @@ void syncAndCheck(const char* const file, int const line) cudaDeviceSynchronize(); cudaError_t result = cudaGetLastError(); if (result) { - TM_LOG_ERROR((std::string("CUDA runtime error: ") + (_cudaGetErrorEnum(result)) + " " + file + ":" - + std::to_string(line)) - .c_str()); + TM_LOG_FATAL("CUDA runtime error: {} {}:{}", _cudaGetErrorEnum(result), file, line); std::abort(); } - TM_LOG_DEBUG(fmtstr("run syncAndCheck at %s:%d", file, line)); + TM_LOG_DEBUG("run syncAndCheck at {}:{}", file, line); } } } diff --git a/src/turbomind/utils/cuda_utils.h b/src/turbomind/utils/cuda_utils.h index 77aa93c639..fb7b311617 100644 --- a/src/turbomind/utils/cuda_utils.h +++ b/src/turbomind/utils/cuda_utils.h @@ -32,9 +32,9 @@ #endif #include "src/turbomind/core/check.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/macro.h" #include "src/turbomind/utils/cuda_bf16_wrapper.h" -#include "src/turbomind/utils/logger.h" namespace turbomind { @@ -84,9 +84,7 @@ template void check(T result, char const* const func, const char* const file, int const line) { if (result) { - TM_LOG_ERROR((std::string("CUDA runtime error: ") + (_cudaGetErrorEnum(result)) + " " + file + ":" - + std::to_string(line)) - .c_str()); + TM_LOG_ERROR("CUDA runtime error: {} {}:{}", _cudaGetErrorEnum(result), file, line); std::abort(); } } @@ -103,7 +101,7 @@ void syncAndCheck(const char* const file, int const line); const char* p_str{}; \ cuGetErrorString(ec, &p_str); \ p_str = p_str ? p_str : "Unknown error"; \ - auto msg = fmtstr("[TM][ERROR] CUDA driver error: %s:%d '%s'", __FILE__, __LINE__, p_str); \ + auto msg = fmt::format("[TM][ERROR] CUDA driver error: {}:{} '{}'", __FILE__, __LINE__, p_str); \ throw std::runtime_error(msg.c_str()); \ } diff --git a/src/turbomind/utils/logger.cc b/src/turbomind/utils/logger.cc deleted file mode 100644 index f9e7bad73a..0000000000 --- a/src/turbomind/utils/logger.cc +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "src/turbomind/utils/logger.h" -#include - -namespace turbomind { - -Logger& Logger::getLogger() -{ - thread_local Logger instance; - return instance; -} - -Logger::Logger() -{ - char* is_first_rank_only_char = std::getenv("TM_LOG_FIRST_RANK_ONLY"); - bool is_first_rank_only = - (is_first_rank_only_char != nullptr && std::string(is_first_rank_only_char) == "ON") ? true : false; - - int device_id; - cudaGetDevice(&device_id); - - char* level_name = std::getenv("TM_LOG_LEVEL"); - if (level_name != nullptr) { - std::map name_to_level = { - {"TRACE", TRACE}, - {"DEBUG", DEBUG}, - {"INFO", INFO}, - {"WARNING", WARNING}, - {"ERROR", ERROR}, - }; - auto level = name_to_level.find(level_name); - // If TM_LOG_FIRST_RANK_ONLY=ON, set LOG LEVEL of other device to ERROR - if (is_first_rank_only && device_id != 0) { - level = name_to_level.find("ERROR"); - } - if (level != name_to_level.end()) { - setLevel(level->second); - } - else { - fprintf(stderr, - "[TM][WARNING] Invalid logger level TM_LOG_LEVEL=%s. " - "Ignore the environment variable and use a default " - "logging level.\n", - level_name); - level_name = nullptr; - } - } -} - -} // namespace turbomind diff --git a/src/turbomind/utils/logger.h b/src/turbomind/utils/logger.h deleted file mode 100644 index cc5dfce94c..0000000000 --- a/src/turbomind/utils/logger.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -#include "src/turbomind/utils/string_utils.h" - -namespace turbomind { - -// cub.cuh brings windows.h -// should be included after cub.cuh -#ifdef ERROR -#undef ERROR -#endif - -class Logger { - -public: - enum Level - { - TRACE = 0, - DEBUG = 10, - INFO = 20, - WARNING = 30, - ERROR = 40 - }; - - static Logger& getLogger(); - Logger(Logger const&) = delete; - void operator=(Logger const&) = delete; - - template - void log(const Level level, const std::string format, const Args&... args) - { - if (level_ <= level) { - std::string fmt = getPrefix(level) + format + "\n"; - // FILE* out = level_ < WARNING ? stdout : stderr; - std::string logstr = fmtstr(fmt, args...); - fprintf(stderr, "%s", logstr.c_str()); - } - } - - template - void log(const Level level, const int rank, const std::string format, const Args&... args) - { - if (level_ <= level) { - std::string fmt = getPrefix(level, rank) + format + "\n"; - // FILE* out = level_ < WARNING ? stdout : stderr; - std::string logstr = fmtstr(fmt, args...); - fprintf(stderr, "%s", logstr.c_str()); - } - } - - void setLevel(const Level level) - { - level_ = level; - log(DEBUG, "Set logger level by %s", getLevelName(level).c_str()); - } - - int getLevel() const - { - return level_; - } - -private: - const std::string PREFIX = "[TM]"; - const std::map level_name_ = { - {TRACE, "TRACE"}, {DEBUG, "DEBUG"}, {INFO, "INFO"}, {WARNING, "WARNING"}, {ERROR, "ERROR"}}; - -#ifndef NDEBUG - const Level DEFAULT_LOG_LEVEL = DEBUG; -#else - const Level DEFAULT_LOG_LEVEL = INFO; -#endif - Level level_ = DEFAULT_LOG_LEVEL; - - Logger(); - - inline const std::string getLevelName(const Level level) - { - return level_name_.at(level); - } - - inline const std::string getPrefix(const Level level) - { - return PREFIX + "[" + getLevelName(level) + "] "; - } - - inline const std::string getPrefix(const Level level, const int rank) - { - return PREFIX + "[" + getLevelName(level) + "][" + std::to_string(rank) + "] "; - } -}; - -#define TM_LOG(level, ...) \ - do { \ - if (turbomind::Logger::getLogger().getLevel() <= level) { \ - turbomind::Logger::getLogger().log(level, __VA_ARGS__); \ - } \ - } while (0) - -#define TM_LOG_TRACE(...) TM_LOG(turbomind::Logger::TRACE, __VA_ARGS__) -#define TM_LOG_DEBUG(...) TM_LOG(turbomind::Logger::DEBUG, __VA_ARGS__) -#define TM_LOG_INFO(...) TM_LOG(turbomind::Logger::INFO, __VA_ARGS__) -#define TM_LOG_WARNING(...) TM_LOG(turbomind::Logger::WARNING, __VA_ARGS__) -#define TM_LOG_ERROR(...) TM_LOG(turbomind::Logger::ERROR, __VA_ARGS__) -} // namespace turbomind diff --git a/tests/csrc/unittests/CMakeLists.txt b/tests/csrc/unittests/CMakeLists.txt index 454f9476f5..9609923967 100644 --- a/tests/csrc/unittests/CMakeLists.txt +++ b/tests/csrc/unittests/CMakeLists.txt @@ -46,24 +46,24 @@ target_compile_features(unittest PRIVATE cxx_std_14) target_link_libraries( # Libs for test_attention_kernels unittest PUBLIC CUDA::cudart CUDA::curand - gpt_kernels gtest memory_utils tensor unfused_attention_kernels cuda_utils logger) + gpt_kernels gtest memory_utils tensor unfused_attention_kernels cuda_utils) target_link_libraries( # Libs for test_logprob_kernels unittest PUBLIC CUDA::cudart - logprob_kernels memory_utils cuda_utils logger) + logprob_kernels memory_utils cuda_utils) target_link_libraries( # Libs for test_penalty_kernels unittest PUBLIC CUDA::cublas CUDA::cublasLt CUDA::cudart - sampling_penalty_kernels memory_utils cuda_utils logger) + sampling_penalty_kernels memory_utils cuda_utils) target_link_libraries( # Libs for test_sampling_kernel unittest PUBLIC CUDA::cudart - sampling_topk_kernels sampling_topp_kernels memory_utils tensor cuda_utils logger) + sampling_topk_kernels sampling_topp_kernels memory_utils tensor cuda_utils) target_link_libraries( # Libs for test_sampling_layer unittest PUBLIC CUDA::cublas CUDA::cublasLt CUDA::cudart cublasMMWrapper memory_utils - DynamicDecodeLayer cuda_utils logger + DynamicDecodeLayer cuda_utils ) target_link_libraries( # Libs for test_tensor - unittest PUBLIC cuda_utils logger) + unittest PUBLIC cuda_utils) diff --git a/tests/csrc/unittests/gtest_utils.h b/tests/csrc/unittests/gtest_utils.h index e84554b26d..542802d286 100644 --- a/tests/csrc/unittests/gtest_utils.h +++ b/tests/csrc/unittests/gtest_utils.h @@ -51,11 +51,11 @@ bool checkResult(std::string name, T* out, T*ref, size_t size, float atol, float bool ok = almostEqual(a, b, atol, rtol); // Print the error. if (!ok && failures < 4) { - TM_LOG_ERROR(">> invalid result for i=%lu:", i); - TM_LOG_ERROR(">> found......: %10.6f", a); - TM_LOG_ERROR(">> expected...: %10.6f", b); - TM_LOG_ERROR(">> error......: %.6f", fabsf(a - b)); - TM_LOG_ERROR(">> tol........: %.6f", atol + rtol * fabs(b)); + TM_LOG_ERROR(">> invalid result for i={}:", i); + TM_LOG_ERROR(">> found......: {:10.6f}", a); + TM_LOG_ERROR(">> expected...: {:10.6f}", b); + TM_LOG_ERROR(">> error......: {:.6f}", fabsf(a - b)); + TM_LOG_ERROR(">> tol........: {:.6f}", atol + rtol * fabs(b)); } // Update the number of failures. failures += ok ? 0 : 1; @@ -68,8 +68,8 @@ bool checkResult(std::string name, T* out, T*ref, size_t size, float atol, float // Allow not matched up to 1% elements. size_t tol_failures = (size_t)(0.01 * size); if (failures > tol_failures) { - TM_LOG_ERROR("%s (failures: %.2f%% atol: %.2e rtol: %.2e rel_gap: %.2e%%)", - name.c_str(), 100. * failures / size, atol, rtol, 100. * relative_gap); + TM_LOG_ERROR({} (failures: {:.2f}% atol: {:.2e} rtol: {:.2e} rel_gap: {:.2e}%)", + name, (double)(100. * failures / size), atol, rtol, 100. * relative_gap); } return failures <= tol_failures; } diff --git a/tests/csrc/unittests/test_logprob_kernels.cu b/tests/csrc/unittests/test_logprob_kernels.cu index 16fd2fb327..db1fdcbe90 100644 --- a/tests/csrc/unittests/test_logprob_kernels.cu +++ b/tests/csrc/unittests/test_logprob_kernels.cu @@ -10,7 +10,7 @@ #include "src/turbomind/kernels/logprob_kernels.h" #include "src/turbomind/utils/allocator.h" #include "src/turbomind/utils/cuda_utils.h" -#include "src/turbomind/utils/logger.h" +#include "src/turbomind/core/logger.h" #include "src/turbomind/utils/memory_utils.h" #include "gtest_utils.h" diff --git a/tests/csrc/unittests/test_sampling_layer.cu b/tests/csrc/unittests/test_sampling_layer.cu index ba2164ece6..c2bbcf4f19 100644 --- a/tests/csrc/unittests/test_sampling_layer.cu +++ b/tests/csrc/unittests/test_sampling_layer.cu @@ -268,13 +268,13 @@ protected: for (auto& expt : expts) { ss << " " << expt; } - TM_LOG_DEBUG("%s", ss.str().c_str()); + TM_LOG_DEBUG("{}", ss.str()); } ++failures; } } TM_LOG_DEBUG( - "check...%6s : failures: %d / %d", failures == 0 ? "....OK" : "FAILED", failures, max_seq_len * batchxbeam); + "check...{:>6} : failures: {} / {}", failures == 0 ? "....OK" : "FAILED", failures, max_seq_len * batchxbeam); delete[] h_output_ids; return failures == 0; } @@ -1135,7 +1135,7 @@ protected: dynamic_decode_layer->forward(&dynamic_decode_output_tensors, &dynamic_decode_input_tensors); - TM_LOG_DEBUG("Step %2d generated ids", step); + TM_LOG_DEBUG("Step {:2d} generated ids", step); cudaD2Hcpy( h_output_ids, dynamic_decode_output_tensors.at("output_ids").getPtrWithOffset(step * (batch_size * beam_width)), @@ -1145,12 +1145,12 @@ protected: for (size_t i = 0; i < batch_size * beam_width; ++i) { int idx = i * vocab_size + h_output_ids[i]; expected_cum_log_probs[i] += (float)h_log_probs[idx]; - TM_LOG_DEBUG("| step %2d batch %2d idx %7d id %6d | log-prob %9.4f (expt: %9.4f) " - "| cum-log-prob %9.4f (expt: %9.4f) | prob %9.4e", - (int)step, - (int)i, - (int)idx, - (int)h_output_ids[i], + TM_LOG_DEBUG("| step {:2d} batch {:2d} idx {:7d} id {:6d} | log-prob {:9.4f} (expt: {:9.4f}) " + "| cum-log-prob {:9.4f} (expt: {:9.4f}) | prob {:9.4e}", + step, + i, + idx, + h_output_ids[i], h_output_log_probs[step * batch_size * beam_width + i], (float)h_log_probs[idx], h_cum_log_probs[i], diff --git a/tests/csrc/unittests/unittest_utils.h b/tests/csrc/unittests/unittest_utils.h index fe48ad160e..287636389b 100644 --- a/tests/csrc/unittests/unittest_utils.h +++ b/tests/csrc/unittests/unittest_utils.h @@ -44,7 +44,7 @@ class TestFailureError: public std::exception { explicit TestFailureError() = default; explicit TestFailureError(std::string name, std::string msg = "") { - msg_ = fmtstr("TEST FAIL [%s] %s", name.c_str(), msg.c_str()); + msg_ = fmtstr("TEST FAIL [%s] %s", name, msg.c_str()); } const char* what() const throw() { @@ -55,7 +55,7 @@ class TestFailureError: public std::exception { #define EXPECT_TRUE(cond) \ do { \ if (!(cond)) { \ - TM_LOG_ERROR("TEST FAIL [%s]: %s at %s:%d", __func__, #cond, __FILE__, __LINE__); \ + TM_LOG_ERROR("TEST FAIL [{}]: {} at {}:{}", __func__, #cond, __FILE__, __LINE__); \ throw TestFailureError(__func__); \ } \ } while (false) @@ -63,7 +63,7 @@ class TestFailureError: public std::exception { #define EXPECT_FALSE(cond) \ do { \ if (cond) { \ - TM_LOG_ERROR("TEST FAIL [%s]: %s at %s:%d", __func__, #cond, __FILE__, __LINE__); \ + TM_LOG_ERROR("TEST FAIL [{}]: {} at {}:{}", __func__, #cond, __FILE__, __LINE__); \ throw TestFailureError(__func__); \ } \ } while (false) @@ -99,11 +99,11 @@ bool checkResult(std::string name, T* out, T* ref, size_t size, float atol, floa bool ok = almostEqual(a, b, atol, rtol); // Print the error. if (!ok && failures < 4) { - TM_LOG_ERROR(">> invalid result for i=%lu:", i); - TM_LOG_ERROR(">> found......: %10.6f", a); - TM_LOG_ERROR(">> expected...: %10.6f", b); - TM_LOG_ERROR(">> error......: %.6f", fabsf(a - b)); - TM_LOG_ERROR(">> tol........: %.6f", atol + rtol * fabs(b)); + TM_LOG_ERROR(">> invalid result for i={}:", i); + TM_LOG_ERROR(">> found......: {:10.6f}", a); + TM_LOG_ERROR(">> expected...: {:10.6f}", b); + TM_LOG_ERROR(">> error......: {:.6f}", fabsf(a - b)); + TM_LOG_ERROR(">> tol........: {:.6f}", atol + rtol * fabs(b)); } // Update the number of failures. failures += ok ? 0 : 1; @@ -115,10 +115,10 @@ bool checkResult(std::string name, T* out, T* ref, size_t size, float atol, floa // Allow not matched up to 1% elements. size_t tol_failures = (size_t)(0.01 * size); - TM_LOG_INFO("check...%6s : %-50s (failures: %.2f%% atol: %.2e rtol: %.2e rel_gap: %.2e%%)", + TM_LOG_INFO("check...{:>6} : {:<50} (failures: {:.2f}% atol: {:.2e} rtol: {:.2e} rel_gap: {:.2e}%)", failures <= tol_failures ? "....OK" : "FAILED", - name.c_str(), - 100. * failures / size, + name, + (double)(100. * failures / size), atol, rtol, 100. * relative_gap);