From 265a00793c4549e8b40f213303b3b46e9caa7495 Mon Sep 17 00:00:00 2001 From: Cameron Gutman Date: Thu, 6 Feb 2025 22:22:00 -0600 Subject: [PATCH] feat(nvenc): implement async encode and hang recovery (#3629) * feat(nvenc): implement async encode * fix(video): allow NVENC to complete teardown asynchronously --- src/nvenc/nvenc_base.cpp | 2 +- src/nvenc/nvenc_d3d11.cpp | 12 ++++++++++++ src/nvenc/nvenc_d3d11.h | 6 ++---- src/video.cpp | 20 +++++++++++++++++++- 4 files changed, 34 insertions(+), 6 deletions(-) diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp index b897f51ded4..bcd12ca1e77 100644 --- a/src/nvenc/nvenc_base.cpp +++ b/src/nvenc/nvenc_base.cpp @@ -528,7 +528,7 @@ namespace nvenc { NV_ENC_LOCK_BITSTREAM lock_bitstream = {min_struct_version(NV_ENC_LOCK_BITSTREAM_VER, 1, 2)}; lock_bitstream.outputBitstream = output_bitstream; - lock_bitstream.doNotWait = 0; + lock_bitstream.doNotWait = async_event_handle ? 1 : 0; if (async_event_handle && !wait_for_async_event(100)) { BOOST_LOG(error) << "NvEnc: frame " << frame_index << " encode wait timeout"; diff --git a/src/nvenc/nvenc_d3d11.cpp b/src/nvenc/nvenc_d3d11.cpp index 28d31371329..74670acdb5e 100644 --- a/src/nvenc/nvenc_d3d11.cpp +++ b/src/nvenc/nvenc_d3d11.cpp @@ -10,11 +10,19 @@ namespace nvenc { + nvenc_d3d11::nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type): + nvenc_base(device_type) { + async_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL); + } + nvenc_d3d11::~nvenc_d3d11() { if (dll) { FreeLibrary(dll); dll = NULL; } + if (async_event_handle) { + CloseHandle(async_event_handle); + } } bool nvenc_d3d11::init_library() { @@ -53,5 +61,9 @@ namespace nvenc { return false; } + bool nvenc_d3d11::wait_for_async_event(uint32_t timeout_ms) { + return WaitForSingleObject(async_event_handle, timeout_ms) == WAIT_OBJECT_0; + } + } // namespace nvenc #endif diff --git a/src/nvenc/nvenc_d3d11.h b/src/nvenc/nvenc_d3d11.h index 29af18184de..efacb607f65 100644 --- a/src/nvenc/nvenc_d3d11.h +++ b/src/nvenc/nvenc_d3d11.h @@ -25,10 +25,7 @@ namespace nvenc { */ class nvenc_d3d11: public nvenc_base { public: - explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type): - nvenc_base(device_type) { - } - + explicit nvenc_d3d11(NV_ENC_DEVICE_TYPE device_type); ~nvenc_d3d11(); /** @@ -39,6 +36,7 @@ namespace nvenc { protected: bool init_library() override; + bool wait_for_async_event(uint32_t timeout_ms) override; private: HMODULE dll = NULL; diff --git a/src/video.cpp b/src/video.cpp index 000a3c7e826..f06e2360332 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -299,6 +299,7 @@ namespace video { REF_FRAMES_INVALIDATION = 1 << 8, ///< Support reference frames invalidation ALWAYS_REPROBE = 1 << 9, ///< This is an encoder of last resort and we want to aggressively probe for a better one YUV444_SUPPORT = 1 << 10, ///< Encoder may support 4:4:4 chroma sampling depending on hardware + ASYNC_TEARDOWN = 1 << 11, ///< Encoder supports async teardown on a different thread }; class avcodec_encode_session_t: public encode_session_t { @@ -503,7 +504,7 @@ namespace video { {}, // Fallback options "h264_nvenc"s, }, - PARALLEL_ENCODING | REF_FRAMES_INVALIDATION | YUV444_SUPPORT // flags + PARALLEL_ENCODING | REF_FRAMES_INVALIDATION | YUV444_SUPPORT | ASYNC_TEARDOWN // flags }; #elif !defined(__APPLE__) encoder_t nvenc { @@ -1856,6 +1857,23 @@ namespace video { return; } + // As a workaround for NVENC hangs and to generally speed up encoder reinit, + // we will complete the encoder teardown in a separate thread if supported. + // This will move expensive processing off the encoder thread to allow us + // to restart encoding as soon as possible. For cases where the NVENC driver + // hang occurs, this thread may probably never exit, but it will allow + // streaming to continue without requiring a full restart of Sunshine. + auto fail_guard = util::fail_guard([&encoder, &session] { + if (encoder.flags & ASYNC_TEARDOWN) { + std::thread encoder_teardown_thread {[session = std::move(session)]() mutable { + BOOST_LOG(info) << "Starting async encoder teardown"; + session.reset(); + BOOST_LOG(info) << "Async encoder teardown complete"; + }}; + encoder_teardown_thread.detach(); + } + }); + // set minimum frame time, avoiding violation of client-requested target framerate auto minimum_frame_time = std::chrono::milliseconds(1000 / std::min(config.framerate, (config::video.min_fps_factor * 10))); BOOST_LOG(debug) << "Minimum frame time set to "sv << minimum_frame_time.count() << "ms, based on min fps factor of "sv << config::video.min_fps_factor << "."sv;