From 11b4addd007f4d057c6cbc48f225f9de8a9652da Mon Sep 17 00:00:00 2001 From: David Rosca Date: Sun, 28 Aug 2022 10:15:16 +0200 Subject: [PATCH 1/5] obs-ffmpeg: Make AMF encoder work on Linux Only the fallback encoders are available (no texture support). Requires AMD proprietary Vulkan driver, using different driver will be detected on startup and the encoders disabled. --- plugins/obs-ffmpeg/CMakeLists.txt | 4 +- plugins/obs-ffmpeg/cmake/legacy.cmake | 3 +- .../obs-ffmpeg/obs-amf-test/CMakeLists.txt | 10 +- .../obs-amf-test/obs-amf-test-linux.cpp | 140 ++++++++++++++++++ plugins/obs-ffmpeg/obs-ffmpeg.c | 10 +- plugins/obs-ffmpeg/texture-amf-opts.hpp | 2 +- plugins/obs-ffmpeg/texture-amf.cpp | 114 +++++++++++--- 7 files changed, 259 insertions(+), 24 deletions(-) create mode 100644 plugins/obs-ffmpeg/obs-amf-test/obs-amf-test-linux.cpp diff --git a/plugins/obs-ffmpeg/CMakeLists.txt b/plugins/obs-ffmpeg/CMakeLists.txt index 3eba00932..778d93ffb 100644 --- a/plugins/obs-ffmpeg/CMakeLists.txt +++ b/plugins/obs-ffmpeg/CMakeLists.txt @@ -108,10 +108,12 @@ if(OS_WINDOWS) jim-nvenc-ver.h obs-ffmpeg.rc) elseif(OS_LINUX OR OS_FREEBSD) + add_subdirectory(obs-amf-test) + find_package(Libva REQUIRED) find_package(Libpci REQUIRED) - target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h) + target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h texture-amf.cpp) target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm Libpci::pci) endif() diff --git a/plugins/obs-ffmpeg/cmake/legacy.cmake b/plugins/obs-ffmpeg/cmake/legacy.cmake index 5540676ea..78b8c30a1 100644 --- a/plugins/obs-ffmpeg/cmake/legacy.cmake +++ b/plugins/obs-ffmpeg/cmake/legacy.cmake @@ -106,9 +106,10 @@ if(OS_WINDOWS) obs-ffmpeg.rc) elseif(OS_POSIX AND NOT OS_MACOS) + add_subdirectory(obs-amf-test) find_package(Libva REQUIRED) find_package(Libpci REQUIRED) - target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h) + target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h texture-amf.cpp) target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm LIBPCI::LIBPCI) endif() diff --git a/plugins/obs-ffmpeg/obs-amf-test/CMakeLists.txt b/plugins/obs-ffmpeg/obs-amf-test/CMakeLists.txt index e00cef1cf..07cf1e0fc 100644 --- a/plugins/obs-ffmpeg/obs-amf-test/CMakeLists.txt +++ b/plugins/obs-ffmpeg/obs-amf-test/CMakeLists.txt @@ -6,8 +6,14 @@ find_package(AMF 1.4.29 REQUIRED) target_include_directories(obs-amf-test PRIVATE ${CMAKE_SOURCE_DIR}/libobs) -target_sources(obs-amf-test PRIVATE obs-amf-test.cpp) -target_link_libraries(obs-amf-test d3d11 dxgi dxguid AMF::AMF) +if(OS_WINDOWS) + target_sources(obs-amf-test PRIVATE obs-amf-test.cpp) + target_link_libraries(obs-amf-test d3d11 dxgi dxguid AMF::AMF) +elseif(OS_POSIX AND NOT OS_MACOS) + find_package(Vulkan REQUIRED) + target_sources(obs-amf-test PRIVATE obs-amf-test-linux.cpp) + target_link_libraries(obs-amf-test dl Vulkan::Vulkan AMF::AMF) +endif() set_target_properties(obs-amf-test PROPERTIES FOLDER "plugins/obs-ffmpeg") diff --git a/plugins/obs-ffmpeg/obs-amf-test/obs-amf-test-linux.cpp b/plugins/obs-ffmpeg/obs-amf-test/obs-amf-test-linux.cpp new file mode 100644 index 000000000..db437d851 --- /dev/null +++ b/plugins/obs-ffmpeg/obs-amf-test/obs-amf-test-linux.cpp @@ -0,0 +1,140 @@ +#include +#include +#include +#include +#include + +#include +#include + +#include +#include + +using namespace amf; + +struct adapter_caps { + bool is_amd = false; + bool supports_avc = false; + bool supports_hevc = false; + bool supports_av1 = false; +}; + +static AMFFactory *amf_factory = nullptr; +static std::map adapter_info; + +static bool has_encoder(AMFContextPtr &amf_context, const wchar_t *encoder_name) +{ + AMFComponentPtr encoder; + AMF_RESULT res = amf_factory->CreateComponent(amf_context, encoder_name, + &encoder); + return res == AMF_OK; +} + +static bool get_adapter_caps(uint32_t adapter_idx) +{ + if (adapter_idx) + return false; + + adapter_caps &caps = adapter_info[adapter_idx]; + + AMF_RESULT res; + AMFContextPtr amf_context; + res = amf_factory->CreateContext(&amf_context); + if (res != AMF_OK) + return true; + + AMFContext1 *context1 = NULL; + res = amf_context->QueryInterface(AMFContext1::IID(), + (void **)&context1); + if (res != AMF_OK) + return false; + res = context1->InitVulkan(nullptr); + context1->Release(); + if (res != AMF_OK) + return false; + + caps.is_amd = true; + caps.supports_avc = has_encoder(amf_context, AMFVideoEncoderVCE_AVC); + caps.supports_hevc = has_encoder(amf_context, AMFVideoEncoder_HEVC); + caps.supports_av1 = has_encoder(amf_context, AMFVideoEncoder_AV1); + + return true; +} + +int main(void) +try { + AMF_RESULT res; + VkResult vkres; + + VkApplicationInfo app_info = {}; + app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + app_info.pApplicationName = "obs-amf-test"; + app_info.apiVersion = VK_API_VERSION_1_2; + + VkInstanceCreateInfo info = {}; + info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + info.pApplicationInfo = &app_info; + + VkInstance instance; + vkres = vkCreateInstance(&info, nullptr, &instance); + if (vkres != VK_SUCCESS) + throw "Failed to initialize Vulkan"; + + uint32_t device_count; + vkres = vkEnumeratePhysicalDevices(instance, &device_count, nullptr); + if (vkres != VK_SUCCESS || !device_count) + throw "Failed to enumerate Vulkan devices"; + + VkPhysicalDevice *devices = new VkPhysicalDevice[device_count]; + vkres = vkEnumeratePhysicalDevices(instance, &device_count, devices); + if (vkres != VK_SUCCESS) + throw "Failed to enumerate Vulkan devices"; + + VkPhysicalDeviceDriverProperties driver_props = {}; + driver_props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + VkPhysicalDeviceProperties2 device_props = {}; + device_props.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + device_props.pNext = &driver_props; + vkGetPhysicalDeviceProperties2(devices[0], &device_props); + + if (strcmp(driver_props.driverName, "AMD proprietary driver")) + throw "Not running AMD proprietary driver"; + + vkDestroyInstance(instance, nullptr); + + /* --------------------------------------------------------- */ + /* try initializing amf, I guess */ + + void *amf_module = dlopen(AMF_DLL_NAMEA, RTLD_LAZY); + if (!amf_module) + throw "Failed to load AMF lib"; + + auto init = (AMFInit_Fn)dlsym(amf_module, AMF_INIT_FUNCTION_NAME); + if (!init) + throw "Failed to get init func"; + + res = init(AMF_FULL_VERSION, &amf_factory); + if (res != AMF_OK) + throw "AMFInit failed"; + + uint32_t idx = 0; + while (get_adapter_caps(idx++)) + ; + + for (auto &[idx, caps] : adapter_info) { + printf("[%u]\n", idx); + printf("is_amd=%s\n", caps.is_amd ? "true" : "false"); + printf("supports_avc=%s\n", + caps.supports_avc ? "true" : "false"); + printf("supports_hevc=%s\n", + caps.supports_hevc ? "true" : "false"); + printf("supports_av1=%s\n", + caps.supports_av1 ? "true" : "false"); + } + + return 0; +} catch (const char *text) { + printf("[error]\nstring=%s\n", text); + return 0; +} diff --git a/plugins/obs-ffmpeg/obs-ffmpeg.c b/plugins/obs-ffmpeg/obs-ffmpeg.c index da0b2c2b4..92421c6f3 100644 --- a/plugins/obs-ffmpeg/obs-ffmpeg.c +++ b/plugins/obs-ffmpeg/obs-ffmpeg.c @@ -360,6 +360,9 @@ static bool hevc_vaapi_supported(void) #ifdef _WIN32 extern void jim_nvenc_load(bool h264, bool hevc, bool av1); extern void jim_nvenc_unload(void); +#endif + +#if defined(_WIN32) || defined(__linux__) extern void amf_load(void); extern void amf_unload(void); #endif @@ -434,7 +437,7 @@ bool obs_module_load(void) #endif } -#ifdef _WIN32 +#if defined(_WIN32) || defined(__linux__) amf_load(); #endif @@ -475,8 +478,11 @@ void obs_module_unload(void) obs_ffmpeg_unload_logging(); #endif -#ifdef _WIN32 +#if defined(_WIN32) || defined(__linux__) amf_unload(); +#endif + +#ifdef _WIN32 jim_nvenc_unload(); #endif } diff --git a/plugins/obs-ffmpeg/texture-amf-opts.hpp b/plugins/obs-ffmpeg/texture-amf-opts.hpp index b1c37d200..d28e3f77e 100644 --- a/plugins/obs-ffmpeg/texture-amf-opts.hpp +++ b/plugins/obs-ffmpeg/texture-amf-opts.hpp @@ -321,7 +321,7 @@ static void amf_apply_opt(amf_base *enc, obs_option *opt) val = atoi(opt->value); } - os_utf8_to_wcs(opt->name, 0, wname, _countof(wname)); + os_utf8_to_wcs(opt->name, 0, wname, amf_countof(wname)); if (is_bool) { bool bool_val = (bool)val; set_amf_property(enc, wname, bool_val); diff --git a/plugins/obs-ffmpeg/texture-amf.cpp b/plugins/obs-ffmpeg/texture-amf.cpp index 88914a027..fe651f0e1 100644 --- a/plugins/obs-ffmpeg/texture-amf.cpp +++ b/plugins/obs-ffmpeg/texture-amf.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -18,6 +19,7 @@ #include #include +#ifdef _WIN32 #include #include #include @@ -25,6 +27,8 @@ #include #include #include +#endif + #include #include #include @@ -55,8 +59,10 @@ struct amf_error { struct handle_tex { uint32_t handle; +#ifdef _WIN32 ComPtr tex; ComPtr km; +#endif }; struct adapter_caps { @@ -72,7 +78,7 @@ static std::map caps; static bool h264_supported = false; static AMFFactory *amf_factory = nullptr; static AMFTrace *amf_trace = nullptr; -static HMODULE amf_module = nullptr; +static void *amf_module = nullptr; static uint64_t amf_version = 0; /* ========================================================================= */ @@ -120,9 +126,11 @@ struct amf_base { virtual void init() = 0; }; -using d3dtex_t = ComPtr; using buf_t = std::vector; +#ifdef _WIN32 +using d3dtex_t = ComPtr; + struct amf_texencode : amf_base, public AMFSurfaceObserver { volatile bool destroying = false; @@ -159,6 +167,7 @@ struct amf_texencode : amf_base, public AMFSurfaceObserver { throw amf_error("InitDX11 failed", res); } }; +#endif struct amf_fallback : amf_base, public AMFSurfaceObserver { volatile bool destroying = false; @@ -186,9 +195,21 @@ struct amf_fallback : amf_base, public AMFSurfaceObserver { void init() override { +#if defined(_WIN32) AMF_RESULT res = amf_context->InitDX11(nullptr, AMF_DX11_1); if (res != AMF_OK) throw amf_error("InitDX11 failed", res); +#elif defined(__linux__) + AMFContext1 *context1 = NULL; + AMF_RESULT res = amf_context->QueryInterface( + AMFContext1::IID(), (void **)&context1); + if (res != AMF_OK) + throw amf_error("CreateContext1 failed", res); + res = context1->InitVulkan(nullptr); + context1->Release(); + if (res != AMF_OK) + throw amf_error("InitVulkan failed", res); +#endif } }; @@ -230,13 +251,18 @@ static void set_amf_property(amf_base *enc, const wchar_t *name, const T &value) : (enc->codec == amf_codec_type::HEVC) \ ? AMF_VIDEO_ENCODER_HEVC_##name \ : AMF_VIDEO_ENCODER_AV1_##name) +#define get_opt_name_enum(name) \ + ((enc->codec == amf_codec_type::AVC) ? (int)AMF_VIDEO_ENCODER_##name \ + : (enc->codec == amf_codec_type::HEVC) \ + ? (int)AMF_VIDEO_ENCODER_HEVC_##name \ + : (int)AMF_VIDEO_ENCODER_AV1_##name) #define set_opt(name, value) set_amf_property(enc, get_opt_name(name), value) #define get_opt(name, value) get_amf_property(enc, get_opt_name(name), value) #define set_avc_opt(name, value) set_avc_property(enc, name, value) #define set_hevc_opt(name, value) set_hevc_property(enc, name, value) #define set_av1_opt(name, value) set_av1_property(enc, name, value) #define set_enum_opt(name, value) \ - set_amf_property(enc, get_opt_name(name), get_opt_name(name##_##value)) + set_amf_property(enc, get_opt_name(name), get_opt_name_enum(name##_##value)) #define set_avc_enum(name, value) \ set_avc_property(enc, name, AMF_VIDEO_ENCODER_##name##_##value) #define set_hevc_enum(name, value) \ @@ -247,6 +273,7 @@ static void set_amf_property(amf_base *enc, const wchar_t *name, const T &value) /* ------------------------------------------------------------------------- */ /* Implementation */ +#ifdef _WIN32 static HMODULE get_lib(const char *lib) { HMODULE mod = GetModuleHandleA(lib); @@ -393,6 +420,7 @@ static void get_tex_from_handle(amf_texencode *enc, uint32_t handle, *km_out = km.Detach(); *tex_out = tex.Detach(); } +#endif static constexpr amf_int64 macroblock_size = 16; @@ -504,7 +532,7 @@ static void convert_to_encoder_packet(amf_base *enc, AMFDataPtr &data, enc->packet_data = AMFBufferPtr(data); data->GetProperty(L"PTS", &packet->pts); - const wchar_t *get_output_type; + const wchar_t *get_output_type = NULL; switch (enc->codec) { case amf_codec_type::AVC: get_output_type = AMF_VIDEO_ENCODER_OUTPUT_DATA_TYPE; @@ -638,6 +666,7 @@ static void amf_encode_base(amf_base *enc, AMFSurface *amf_surf, static bool amf_encode_tex(void *data, uint32_t handle, int64_t pts, uint64_t lock_key, uint64_t *next_key, encoder_packet *packet, bool *received_packet) +#ifdef _WIN32 try { amf_texencode *enc = (amf_texencode *)data; ID3D11DeviceContext *context = enc->context; @@ -714,6 +743,18 @@ try { *received_packet = false; return false; } +#else +{ + UNUSED_PARAMETER(data); + UNUSED_PARAMETER(handle); + UNUSED_PARAMETER(pts); + UNUSED_PARAMETER(lock_key); + UNUSED_PARAMETER(next_key); + UNUSED_PARAMETER(packet); + UNUSED_PARAMETER(received_packet); + return false; +} +#endif static buf_t alloc_buf(amf_fallback *enc) { @@ -1177,6 +1218,7 @@ static const char *amf_avc_get_name(void *) static inline int get_avc_preset(amf_base *enc, const char *preset) { + UNUSED_PARAMETER(enc); if (astrcmpi(preset, "quality") == 0) return AMF_VIDEO_ENCODER_QUALITY_PRESET_QUALITY; else if (astrcmpi(preset, "speed") == 0) @@ -1287,7 +1329,7 @@ static bool amf_avc_init(void *data, obs_data_t *settings) set_avc_property(enc, B_PIC_PATTERN, bf); } else if (bf != 0) { - warn("B-Frames set to %lld but b-frames are not " + warn("B-Frames set to %" PRId64 " but b-frames are not " "supported by this device", bf); bf = 0; @@ -1332,12 +1374,12 @@ static bool amf_avc_init(void *data, obs_data_t *settings) info("settings:\n" "\trate_control: %s\n" - "\tbitrate: %d\n" - "\tcqp: %d\n" + "\tbitrate: %" PRId64 "\n" + "\tcqp: %" PRId64 "\n" "\tkeyint: %d\n" "\tpreset: %s\n" "\tprofile: %s\n" - "\tb-frames: %d\n" + "\tb-frames: %" PRId64 "\n" "\twidth: %d\n" "\theight: %d\n" "\tparams: %s", @@ -1407,6 +1449,7 @@ static void amf_avc_create_internal(amf_base *enc, obs_data_t *settings) static void *amf_avc_create_texencode(obs_data_t *settings, obs_encoder_t *encoder) +#ifdef _WIN32 try { check_texture_encode_capability(encoder, amf_codec_type::AVC); @@ -1429,6 +1472,12 @@ try { blog(LOG_ERROR, "[texture-amf-h264] %s: %s", __FUNCTION__, err); return obs_encoder_create_rerouted(encoder, "h264_fallback_amf"); } +#else +{ + UNUSED_PARAMETER(settings); + return obs_encoder_create_rerouted(encoder, "h264_fallback_amf"); +} +#endif static void *amf_avc_create_fallback(obs_data_t *settings, obs_encoder_t *encoder) @@ -1514,6 +1563,7 @@ static const char *amf_hevc_get_name(void *) static inline int get_hevc_preset(amf_base *enc, const char *preset) { + UNUSED_PARAMETER(enc); if (astrcmpi(preset, "balanced") == 0) return AMF_VIDEO_ENCODER_HEVC_QUALITY_PRESET_BALANCED; else if (astrcmpi(preset, "speed") == 0) @@ -1633,8 +1683,8 @@ static bool amf_hevc_init(void *data, obs_data_t *settings) info("settings:\n" "\trate_control: %s\n" - "\tbitrate: %d\n" - "\tcqp: %d\n" + "\tbitrate: %" PRId64 "\n" + "\tcqp: %" PRId64 "\n" "\tkeyint: %d\n" "\tpreset: %s\n" "\tprofile: %s\n" @@ -1751,6 +1801,7 @@ static void amf_hevc_create_internal(amf_base *enc, obs_data_t *settings) static void *amf_hevc_create_texencode(obs_data_t *settings, obs_encoder_t *encoder) +#ifdef _WIN32 try { check_texture_encode_capability(encoder, amf_codec_type::HEVC); @@ -1773,6 +1824,12 @@ try { blog(LOG_ERROR, "[texture-amf-h265] %s: %s", __FUNCTION__, err); return obs_encoder_create_rerouted(encoder, "h265_fallback_amf"); } +#else +{ + UNUSED_PARAMETER(settings); + return obs_encoder_create_rerouted(encoder, "h265_fallback_amf"); +} +#endif static void *amf_hevc_create_fallback(obs_data_t *settings, obs_encoder_t *encoder) @@ -1854,6 +1911,7 @@ static const char *amf_av1_get_name(void *) static inline int get_av1_preset(amf_base *enc, const char *preset) { + UNUSED_PARAMETER(enc); if (astrcmpi(preset, "highquality") == 0) return AMF_VIDEO_ENCODER_AV1_QUALITY_PRESET_HIGH_QUALITY; else if (astrcmpi(preset, "quality") == 0) @@ -1987,8 +2045,8 @@ static bool amf_av1_init(void *data, obs_data_t *settings) info("settings:\n" "\trate_control: %s\n" - "\tbitrate: %d\n" - "\tcqp: %d\n" + "\tbitrate: %" PRId64 "\n" + "\tcqp: %" PRId64 "\n" "\tkeyint: %d\n" "\tpreset: %s\n" "\tprofile: %s\n" @@ -2052,6 +2110,7 @@ static void amf_av1_create_internal(amf_base *enc, obs_data_t *settings) static void *amf_av1_create_texencode(obs_data_t *settings, obs_encoder_t *encoder) +#ifdef _WIN32 try { check_texture_encode_capability(encoder, amf_codec_type::AV1); @@ -2074,6 +2133,12 @@ try { blog(LOG_ERROR, "[texture-amf-av1] %s: %s", __FUNCTION__, err); return obs_encoder_create_rerouted(encoder, "av1_fallback_amf"); } +#else +{ + UNUSED_PARAMETER(settings); + return obs_encoder_create_rerouted(encoder, "av1_fallback_amf"); +} +#endif static void *amf_av1_create_fallback(obs_data_t *settings, obs_encoder_t *encoder) @@ -2164,9 +2229,16 @@ static bool enum_luids(void *param, uint32_t idx, uint64_t luid) return true; } +#ifdef _WIN32 +#define OBS_AMF_TEST "obs-amf-test.exe" +#else +#define OBS_AMF_TEST "obs-amf-test" +#endif + extern "C" void amf_load(void) try { AMF_RESULT res; +#ifdef _WIN32 HMODULE amf_module_test; /* Check if the DLL is present before running the more expensive */ @@ -2176,16 +2248,24 @@ try { if (!amf_module_test) throw "No AMF library"; FreeLibrary(amf_module_test); +#else + void *amf_module_test = os_dlopen(AMF_DLL_NAMEA); + if (!amf_module_test) + throw "No AMF library"; + os_dlclose(amf_module_test); +#endif /* ----------------------------------- */ /* Check for supported codecs */ - BPtr test_exe = os_get_executable_path_ptr("obs-amf-test.exe"); + BPtr test_exe = os_get_executable_path_ptr(OBS_AMF_TEST); std::stringstream cmd; std::string caps_str; cmd << test_exe; +#ifdef _WIN32 enum_graphics_device_luids(enum_luids, &cmd); +#endif os_process_pipe_t *pp = os_process_pipe_create(cmd.str().c_str(), "r"); if (!pp) @@ -2245,12 +2325,12 @@ try { /* ----------------------------------- */ /* Init AMF */ - amf_module = LoadLibraryW(AMF_DLL_NAME); + amf_module = os_dlopen(AMF_DLL_NAMEA); if (!amf_module) throw "AMF library failed to load"; AMFInit_Fn init = - (AMFInit_Fn)GetProcAddress(amf_module, AMF_INIT_FUNCTION_NAME); + (AMFInit_Fn)os_dlsym(amf_module, AMF_INIT_FUNCTION_NAME); if (!init) throw "Failed to get AMFInit address"; @@ -2262,7 +2342,7 @@ try { if (res != AMF_OK) throw amf_error("GetTrace failed", res); - AMFQueryVersion_Fn get_ver = (AMFQueryVersion_Fn)GetProcAddress( + AMFQueryVersion_Fn get_ver = (AMFQueryVersion_Fn)os_dlsym( amf_module, AMF_QUERY_VERSION_FUNCTION_NAME); if (!get_ver) throw "Failed to get AMFQueryVersion address"; @@ -2301,7 +2381,7 @@ try { } catch (const amf_error &err) { /* doing an error here because it means at least the library has loaded * successfully, so they probably have AMD at this point */ - blog(LOG_ERROR, "%s: %s: 0x%lX", __FUNCTION__, err.str, + blog(LOG_ERROR, "%s: %s: 0x%uX", __FUNCTION__, err.str, (uint32_t)err.res); } -- 2.40.0 From 33d3c849e8e68f0d479548640202d2e8e7041396 Mon Sep 17 00:00:00 2001 From: Kurt Kartaltepe Date: Mon, 6 Feb 2023 21:13:59 -0800 Subject: [PATCH 2/5] libobs-opengl: Enable imported dmabufs for rendering For now just tag all imported images with GS_RENDER, this may not work for some images that were produced by some hardware other than the gpu render engines. But since we dont import vaapi decoded frames we probably wont run into this. And we need this to render into vaapi frames destined for encoding. --- libobs-opengl/gl-egl-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libobs-opengl/gl-egl-common.c b/libobs-opengl/gl-egl-common.c index f06cd1901..e53f5a275 100644 --- a/libobs-opengl/gl-egl-common.c +++ b/libobs-opengl/gl-egl-common.c @@ -186,7 +186,7 @@ struct gs_texture *gl_egl_create_texture_from_eglimage( struct gs_texture *texture = NULL; texture = gs_texture_create(width, height, color_format, 1, NULL, - GS_GL_DUMMYTEX); + GS_GL_DUMMYTEX | GS_RENDER_TARGET); const GLuint gltex = *(GLuint *)gs_texture_get_obj(texture); gl_bind_texture(GL_TEXTURE_2D, gltex); -- 2.40.0 From de0dd29322328c003944905c89b7da6401159fb5 Mon Sep 17 00:00:00 2001 From: Kurt Kartaltepe Date: Wed, 29 Mar 2023 10:20:22 +0200 Subject: [PATCH 3/5] libobs,libobs-opengl: enable gpu encoding for opengl Enable all of the previously windows only paths for opengl backends that support encode_texture2 --- libobs-opengl/gl-subsystem.c | 12 +++ libobs/graphics/graphics-imports.c | 4 +- libobs/graphics/graphics-internal.h | 20 ++-- libobs/graphics/graphics.c | 156 ++++++++++++++-------------- libobs/graphics/graphics.h | 13 ++- libobs/obs-encoder.h | 3 + libobs/obs-internal.h | 2 +- libobs/obs-video-gpu-encode.c | 23 ++-- libobs/obs-video.c | 22 ++-- libobs/obs.c | 6 -- 10 files changed, 135 insertions(+), 126 deletions(-) diff --git a/libobs-opengl/gl-subsystem.c b/libobs-opengl/gl-subsystem.c index 0cdd46d08..8499af20e 100644 --- a/libobs-opengl/gl-subsystem.c +++ b/libobs-opengl/gl-subsystem.c @@ -1519,6 +1519,18 @@ void gs_swapchain_destroy(gs_swapchain_t *swapchain) bfree(swapchain); } +bool device_nv12_available(gs_device_t *device) +{ + UNUSED_PARAMETER(device); + return true; // always a split R8,R8G8 texture. +} + +bool device_p010_available(gs_device_t *device) +{ + UNUSED_PARAMETER(device); + return true; // always a split R16,R16G16 texture. +} + uint32_t gs_voltexture_get_width(const gs_texture_t *voltex) { /* TODO */ diff --git a/libobs/graphics/graphics-imports.c b/libobs/graphics/graphics-imports.c index d6eaccccc..8de5bebb6 100644 --- a/libobs/graphics/graphics-imports.c +++ b/libobs/graphics/graphics-imports.c @@ -195,6 +195,8 @@ bool load_graphics_imports(struct gs_exports *exports, void *module, GRAPHICS_IMPORT_OPTIONAL(device_nv12_available); GRAPHICS_IMPORT_OPTIONAL(device_p010_available); + GRAPHICS_IMPORT_OPTIONAL(device_texture_create_nv12); + GRAPHICS_IMPORT_OPTIONAL(device_texture_create_p010); GRAPHICS_IMPORT(device_is_monitor_hdr); @@ -230,8 +232,6 @@ bool load_graphics_imports(struct gs_exports *exports, void *module, GRAPHICS_IMPORT_OPTIONAL(device_texture_wrap_obj); GRAPHICS_IMPORT_OPTIONAL(device_texture_acquire_sync); GRAPHICS_IMPORT_OPTIONAL(device_texture_release_sync); - GRAPHICS_IMPORT_OPTIONAL(device_texture_create_nv12); - GRAPHICS_IMPORT_OPTIONAL(device_texture_create_p010); GRAPHICS_IMPORT_OPTIONAL(device_stagesurface_create_nv12); GRAPHICS_IMPORT_OPTIONAL(device_stagesurface_create_p010); GRAPHICS_IMPORT_OPTIONAL(device_register_loss_callbacks); diff --git a/libobs/graphics/graphics-internal.h b/libobs/graphics/graphics-internal.h index d0ae5b895..268ad36da 100644 --- a/libobs/graphics/graphics-internal.h +++ b/libobs/graphics/graphics-internal.h @@ -273,6 +273,16 @@ struct gs_exports { bool (*device_nv12_available)(gs_device_t *device); bool (*device_p010_available)(gs_device_t *device); + bool (*device_texture_create_nv12)(gs_device_t *device, + gs_texture_t **tex_y, + gs_texture_t **tex_uv, + uint32_t width, uint32_t height, + uint32_t flags); + bool (*device_texture_create_p010)(gs_device_t *device, + gs_texture_t **tex_y, + gs_texture_t **tex_uv, + uint32_t width, uint32_t height, + uint32_t flags); bool (*device_is_monitor_hdr)(gs_device_t *device, void *monitor); @@ -330,16 +340,6 @@ struct gs_exports { int (*device_texture_acquire_sync)(gs_texture_t *tex, uint64_t key, uint32_t ms); int (*device_texture_release_sync)(gs_texture_t *tex, uint64_t key); - bool (*device_texture_create_nv12)(gs_device_t *device, - gs_texture_t **tex_y, - gs_texture_t **tex_uv, - uint32_t width, uint32_t height, - uint32_t flags); - bool (*device_texture_create_p010)(gs_device_t *device, - gs_texture_t **tex_y, - gs_texture_t **tex_uv, - uint32_t width, uint32_t height, - uint32_t flags); gs_stagesurf_t *(*device_stagesurface_create_nv12)(gs_device_t *device, uint32_t width, diff --git a/libobs/graphics/graphics.c b/libobs/graphics/graphics.c index 288fb1db6..78a7fc27b 100644 --- a/libobs/graphics/graphics.c +++ b/libobs/graphics/graphics.c @@ -2908,6 +2908,84 @@ void gs_debug_marker_end(void) thread_graphics->device); } +bool gs_texture_create_nv12(gs_texture_t **tex_y, gs_texture_t **tex_uv, + uint32_t width, uint32_t height, uint32_t flags) +{ + graphics_t *graphics = thread_graphics; + bool success = false; + + if (!gs_valid("gs_texture_create_nv12")) + return false; + + if ((width & 1) == 1 || (height & 1) == 1) { + blog(LOG_ERROR, "NV12 textures must have dimensions " + "divisible by 2."); + return false; + } + + if (graphics->exports.device_texture_create_nv12) { + success = graphics->exports.device_texture_create_nv12( + graphics->device, tex_y, tex_uv, width, height, flags); + if (success) + return true; + } + + *tex_y = gs_texture_create(width, height, GS_R8, 1, NULL, flags); + *tex_uv = gs_texture_create(width / 2, height / 2, GS_R8G8, 1, NULL, + flags); + + if (!*tex_y || !*tex_uv) { + if (*tex_y) + gs_texture_destroy(*tex_y); + if (*tex_uv) + gs_texture_destroy(*tex_uv); + *tex_y = NULL; + *tex_uv = NULL; + return false; + } + + return true; +} + +bool gs_texture_create_p010(gs_texture_t **tex_y, gs_texture_t **tex_uv, + uint32_t width, uint32_t height, uint32_t flags) +{ + graphics_t *graphics = thread_graphics; + bool success = false; + + if (!gs_valid("gs_texture_create_p010")) + return false; + + if ((width & 1) == 1 || (height & 1) == 1) { + blog(LOG_ERROR, "P010 textures must have dimensions " + "divisible by 2."); + return false; + } + + if (graphics->exports.device_texture_create_p010) { + success = graphics->exports.device_texture_create_p010( + graphics->device, tex_y, tex_uv, width, height, flags); + if (success) + return true; + } + + *tex_y = gs_texture_create(width, height, GS_R16, 1, NULL, flags); + *tex_uv = gs_texture_create(width / 2, height / 2, GS_RG16, 1, NULL, + flags); + + if (!*tex_y || !*tex_uv) { + if (*tex_y) + gs_texture_destroy(*tex_y); + if (*tex_uv) + gs_texture_destroy(*tex_uv); + *tex_y = NULL; + *tex_uv = NULL; + return false; + } + + return true; +} + #ifdef __APPLE__ /** Platform specific functions */ @@ -3175,84 +3253,6 @@ int gs_texture_release_sync(gs_texture_t *tex, uint64_t key) return -1; } -bool gs_texture_create_nv12(gs_texture_t **tex_y, gs_texture_t **tex_uv, - uint32_t width, uint32_t height, uint32_t flags) -{ - graphics_t *graphics = thread_graphics; - bool success = false; - - if (!gs_valid("gs_texture_create_nv12")) - return false; - - if ((width & 1) == 1 || (height & 1) == 1) { - blog(LOG_ERROR, "NV12 textures must have dimensions " - "divisible by 2."); - return false; - } - - if (graphics->exports.device_texture_create_nv12) { - success = graphics->exports.device_texture_create_nv12( - graphics->device, tex_y, tex_uv, width, height, flags); - if (success) - return true; - } - - *tex_y = gs_texture_create(width, height, GS_R8, 1, NULL, flags); - *tex_uv = gs_texture_create(width / 2, height / 2, GS_R8G8, 1, NULL, - flags); - - if (!*tex_y || !*tex_uv) { - if (*tex_y) - gs_texture_destroy(*tex_y); - if (*tex_uv) - gs_texture_destroy(*tex_uv); - *tex_y = NULL; - *tex_uv = NULL; - return false; - } - - return true; -} - -bool gs_texture_create_p010(gs_texture_t **tex_y, gs_texture_t **tex_uv, - uint32_t width, uint32_t height, uint32_t flags) -{ - graphics_t *graphics = thread_graphics; - bool success = false; - - if (!gs_valid("gs_texture_create_p010")) - return false; - - if ((width & 1) == 1 || (height & 1) == 1) { - blog(LOG_ERROR, "P010 textures must have dimensions " - "divisible by 2."); - return false; - } - - if (graphics->exports.device_texture_create_p010) { - success = graphics->exports.device_texture_create_p010( - graphics->device, tex_y, tex_uv, width, height, flags); - if (success) - return true; - } - - *tex_y = gs_texture_create(width, height, GS_R16, 1, NULL, flags); - *tex_uv = gs_texture_create(width / 2, height / 2, GS_RG16, 1, NULL, - flags); - - if (!*tex_y || !*tex_uv) { - if (*tex_y) - gs_texture_destroy(*tex_y); - if (*tex_uv) - gs_texture_destroy(*tex_uv); - *tex_y = NULL; - *tex_uv = NULL; - return false; - } - - return true; -} - gs_stagesurf_t *gs_stagesurface_create_nv12(uint32_t width, uint32_t height) { graphics_t *graphics = thread_graphics; diff --git a/libobs/graphics/graphics.h b/libobs/graphics/graphics.h index 0f62d5b95..ee17a65fa 100644 --- a/libobs/graphics/graphics.h +++ b/libobs/graphics/graphics.h @@ -857,6 +857,12 @@ EXPORT bool gs_timer_range_get_data(gs_timer_range_t *range, bool *disjoint, EXPORT bool gs_nv12_available(void); EXPORT bool gs_p010_available(void); +EXPORT bool gs_texture_create_nv12(gs_texture_t **tex_y, gs_texture_t **tex_uv, + uint32_t width, uint32_t height, + uint32_t flags); +EXPORT bool gs_texture_create_p010(gs_texture_t **tex_y, gs_texture_t **tex_uv, + uint32_t width, uint32_t height, + uint32_t flags); EXPORT bool gs_is_monitor_hdr(void *monitor); @@ -955,13 +961,6 @@ EXPORT int gs_texture_acquire_sync(gs_texture_t *tex, uint64_t key, */ EXPORT int gs_texture_release_sync(gs_texture_t *tex, uint64_t key); -EXPORT bool gs_texture_create_nv12(gs_texture_t **tex_y, gs_texture_t **tex_uv, - uint32_t width, uint32_t height, - uint32_t flags); -EXPORT bool gs_texture_create_p010(gs_texture_t **tex_y, gs_texture_t **tex_uv, - uint32_t width, uint32_t height, - uint32_t flags); - EXPORT gs_stagesurf_t *gs_stagesurface_create_nv12(uint32_t width, uint32_t height); EXPORT gs_stagesurf_t *gs_stagesurface_create_p010(uint32_t width, diff --git a/libobs/obs-encoder.h b/libobs/obs-encoder.h index 6e831af5c..c6184bfb5 100644 --- a/libobs/obs-encoder.h +++ b/libobs/obs-encoder.h @@ -29,6 +29,9 @@ extern "C" { #endif +struct obs_encoder; +typedef struct obs_encoder obs_encoder_t; + #define OBS_ENCODER_CAP_DEPRECATED (1 << 0) #define OBS_ENCODER_CAP_PASS_TEXTURE (1 << 1) #define OBS_ENCODER_CAP_DYN_BITRATE (1 << 2) diff --git a/libobs/obs-internal.h b/libobs/obs-internal.h index 1ea555c3a..6e975d065 100644 --- a/libobs/obs-internal.h +++ b/libobs/obs-internal.h @@ -268,9 +268,9 @@ struct obs_core_video_mix { gs_stagesurf_t *active_copy_surfaces[NUM_TEXTURES][NUM_CHANNELS]; gs_stagesurf_t *copy_surfaces[NUM_TEXTURES][NUM_CHANNELS]; gs_texture_t *convert_textures[NUM_CHANNELS]; + gs_texture_t *convert_textures_encode[NUM_CHANNELS]; #ifdef _WIN32 gs_stagesurf_t *copy_surfaces_encode[NUM_TEXTURES]; - gs_texture_t *convert_textures_encode[NUM_CHANNELS]; #endif gs_texture_t *render_texture; gs_texture_t *output_texture; diff --git a/libobs/obs-video-gpu-encode.c b/libobs/obs-video-gpu-encode.c index 0dfb11df0..0d65a6d17 100644 --- a/libobs/obs-video-gpu-encode.c +++ b/libobs/obs-video-gpu-encode.c @@ -17,8 +17,11 @@ #include "obs-internal.h" -static void *gpu_encode_thread(struct obs_core_video_mix *video) +#define NBSP "\xC2\xA0" +static const char *gpu_encode_frame_name = "gpu_encode_frame"; +static void *gpu_encode_thread(void *data) { + struct obs_core_video_mix *video = data; uint64_t interval = video_output_get_frame_time(video->video); DARRAY(obs_encoder_t *) encoders; int wait_frames = NUM_ENCODE_TEXTURE_FRAMES_TO_WAIT; @@ -26,6 +29,10 @@ static void *gpu_encode_thread(struct obs_core_video_mix *video) da_init(encoders); os_set_thread_name("obs gpu encode thread"); + const char *gpu_encode_thread_name = profile_store_name( + obs_get_profiler_name_store(), + "obs_gpu_encode_thread(%g" NBSP "ms)", interval / 1000000.); + profile_register_root(gpu_encode_thread_name, interval); while (os_sem_wait(video->gpu_encode_semaphore) == 0) { struct obs_tex_frame tf; @@ -42,6 +49,8 @@ static void *gpu_encode_thread(struct obs_core_video_mix *video) continue; } + profile_start(gpu_encode_thread_name); + os_event_reset(video->gpu_encode_inactive); /* -------------- */ @@ -141,6 +150,9 @@ static void *gpu_encode_thread(struct obs_core_video_mix *video) obs_encoder_release(encoders.array[i]); da_resize(encoders, 0); + + profile_end(gpu_encode_thread_name); + profile_reenable_thread(); } da_free(encoders); @@ -149,7 +161,6 @@ static void *gpu_encode_thread(struct obs_core_video_mix *video) bool init_gpu_encoding(struct obs_core_video_mix *video) { -#ifdef _WIN32 const struct video_output_info *info = video_output_get_info(video->video); @@ -173,7 +184,11 @@ bool init_gpu_encoding(struct obs_core_video_mix *video) return false; } +#ifdef _WIN32 uint32_t handle = gs_texture_get_shared_handle(tex); +#else + uint32_t handle = (uint32_t)-1; +#endif struct obs_tex_frame frame = { .tex = tex, .tex_uv = tex_uv, .handle = handle}; @@ -195,10 +210,6 @@ bool init_gpu_encoding(struct obs_core_video_mix *video) video->gpu_encode_thread_initialized = true; return true; -#else - UNUSED_PARAMETER(video); - return false; -#endif } void stop_gpu_encoding_thread(struct obs_core_video_mix *video) diff --git a/libobs/obs-video.c b/libobs/obs-video.c index 60acaaf6f..8918b869d 100644 --- a/libobs/obs-video.c +++ b/libobs/obs-video.c @@ -427,7 +427,6 @@ stage_output_texture(struct obs_core_video_mix *video, int cur_texture, profile_end(stage_output_texture_name); } -#ifdef _WIN32 static inline bool queue_frame(struct obs_core_video_mix *video, bool raw_active, struct obs_vframe_info *vframe_info) @@ -455,7 +454,9 @@ static inline bool queue_frame(struct obs_core_video_mix *video, circlebuf_pop_front(&video->gpu_encoder_avail_queue, &tf, sizeof(tf)); if (tf.released) { +#ifdef _WIN32 gs_texture_acquire_sync(tf.tex, tf.lock_key, GS_WAIT_INFINITE); +#endif tf.released = false; } @@ -479,8 +480,10 @@ static inline bool queue_frame(struct obs_core_video_mix *video, tf.count = 1; tf.timestamp = vframe_info->timestamp; tf.released = true; +#ifdef _WIN32 tf.handle = gs_texture_get_shared_handle(tf.tex); gs_texture_release_sync(tf.tex, ++tf.lock_key); +#endif circlebuf_push_back(&video->gpu_encoder_queue, &tf, sizeof(tf)); os_sem_post(video->gpu_encode_semaphore); @@ -520,7 +523,6 @@ static void output_gpu_encoders(struct obs_core_video_mix *video, end: profile_end(output_gpu_encoders_name); } -#endif static inline void render_video(struct obs_core_video_mix *video, bool raw_active, const bool gpu_active, @@ -540,26 +542,24 @@ static inline void render_video(struct obs_core_video_mix *video, size_t channel_count = NUM_CHANNELS; gs_texture_t *output_texture = render_output_texture(video); -#ifdef _WIN32 if (gpu_active) { convert_textures = video->convert_textures_encode; +#ifdef _WIN32 copy_surfaces = video->copy_surfaces_encode; channel_count = 1; +#endif gs_flush(); } -#endif if (video->gpu_conversion) { render_convert_texture(video, convert_textures, output_texture); } -#ifdef _WIN32 if (gpu_active) { gs_flush(); output_gpu_encoders(video, raw_active); } -#endif if (raw_active) { stage_output_texture(video, cur_texture, @@ -963,12 +963,10 @@ static void clear_raw_frame_data(struct obs_core_video_mix *video) circlebuf_free(&video->vframe_info_buffer); } -#ifdef _WIN32 static void clear_gpu_frame_data(struct obs_core_video_mix *video) { circlebuf_free(&video->vframe_info_buffer_gpu); } -#endif extern THREAD_LOCAL bool is_graphics_thread; @@ -1076,30 +1074,22 @@ static const char *output_frame_name = "output_frame"; static inline void update_active_state(struct obs_core_video_mix *video) { const bool raw_was_active = video->raw_was_active; -#ifdef _WIN32 const bool gpu_was_active = video->gpu_was_active; -#endif const bool was_active = video->was_active; bool raw_active = os_atomic_load_long(&video->raw_active) > 0; -#ifdef _WIN32 const bool gpu_active = os_atomic_load_long(&video->gpu_encoder_active) > 0; const bool active = raw_active || gpu_active; -#else - const bool active = raw_active; -#endif if (!was_active && active) clear_base_frame_data(video); if (!raw_was_active && raw_active) clear_raw_frame_data(video); -#ifdef _WIN32 if (!gpu_was_active && gpu_active) clear_gpu_frame_data(video); video->gpu_was_active = gpu_active; -#endif video->raw_was_active = raw_active; video->was_active = active; } diff --git a/libobs/obs.c b/libobs/obs.c index 1850ecc75..1e57f2517 100644 --- a/libobs/obs.c +++ b/libobs/obs.c @@ -179,7 +179,6 @@ static bool obs_init_gpu_conversion(struct obs_core_video_mix *video) video->convert_textures[0] = NULL; video->convert_textures[1] = NULL; video->convert_textures[2] = NULL; -#ifdef _WIN32 video->convert_textures_encode[0] = NULL; video->convert_textures_encode[1] = NULL; video->convert_textures_encode[2] = NULL; @@ -200,7 +199,6 @@ static bool obs_init_gpu_conversion(struct obs_core_video_mix *video) return false; } } -#endif bool success = true; @@ -297,13 +295,11 @@ static bool obs_init_gpu_conversion(struct obs_core_video_mix *video) gs_texture_destroy(video->convert_textures[c]); video->convert_textures[c] = NULL; } -#ifdef _WIN32 if (video->convert_textures_encode[c]) { gs_texture_destroy( video->convert_textures_encode[c]); video->convert_textures_encode[c] = NULL; } -#endif } } @@ -817,12 +813,10 @@ static void obs_free_render_textures(struct obs_core_video_mix *video) gs_texture_destroy(video->convert_textures[c]); video->convert_textures[c] = NULL; } -#ifdef _WIN32 if (video->convert_textures_encode[c]) { gs_texture_destroy(video->convert_textures_encode[c]); video->convert_textures_encode[c] = NULL; } -#endif } gs_texture_destroy(video->output_texture); -- 2.40.0 From d9c1a0ce4ae3b7a2465b0f77dd1bc8e8ff74dd21 Mon Sep 17 00:00:00 2001 From: Torge Matthies Date: Sun, 11 Jul 2021 21:23:07 +0200 Subject: [PATCH 4/5] libobs: Add encode_texture2 function to struct obs_encoder_info And use it if non-NULL instead of encode_texture. --- libobs/obs-encoder.c | 21 ++++++++++++++++++- libobs/obs-encoder.h | 33 ++++++++++++++++++++++++++++++ libobs/obs-internal.h | 3 +++ libobs/obs-module.c | 38 +++++++++++++++++++++++++---------- libobs/obs-video-gpu-encode.c | 33 +++++++++++++++++++++++++----- 5 files changed, 111 insertions(+), 17 deletions(-) diff --git a/libobs/obs-encoder.c b/libobs/obs-encoder.c index 677b92f7d..83bf911c9 100644 --- a/libobs/obs-encoder.c +++ b/libobs/obs-encoder.c @@ -194,7 +194,7 @@ static void add_connection(struct obs_encoder *encoder) &audio_info, receive_audio, encoder); } else { struct video_scale_info info = {0}; - get_video_info(encoder, &info); + obs_encoder_get_video_info(encoder, &info); if (gpu_encode_available(encoder)) { start_gpu_encode(encoder); @@ -528,6 +528,25 @@ void obs_encoder_shutdown(obs_encoder_t *encoder) pthread_mutex_unlock(&encoder->init_mutex); } +void obs_encoder_get_video_info(struct obs_encoder *encoder, + struct video_scale_info *info) +{ + const struct video_output_info *voi; + voi = video_output_get_info(encoder->media); + + info->format = voi->format; + info->colorspace = voi->colorspace; + info->range = voi->range; + info->width = obs_encoder_get_width(encoder); + info->height = obs_encoder_get_height(encoder); + + if (encoder->info.get_video_info) + encoder->info.get_video_info(encoder->context.data, info); + + if (info->width != voi->width || info->height != voi->height) + obs_encoder_set_scaled_size(encoder, info->width, info->height); +} + static inline size_t get_callback_idx(const struct obs_encoder *encoder, void (*new_packet)(void *param, struct encoder_packet *packet), diff --git a/libobs/obs-encoder.h b/libobs/obs-encoder.h index c6184bfb5..f33e668bf 100644 --- a/libobs/obs-encoder.h +++ b/libobs/obs-encoder.h @@ -105,6 +105,18 @@ struct encoder_frame { int64_t pts; }; +struct gs_texture; + +/** Encoder input texture */ +struct encoder_texture { + /** Texture format and size */ + struct video_scale_info info; + /** Shared texture handle, only set on Windows */ + uint32_t handle; + /** Textures, NULL-terminated */ + struct gs_texture *tex[5]; +}; + /** * Encoder interface * @@ -265,6 +277,27 @@ struct obs_encoder_info { uint64_t lock_key, uint64_t *next_key, struct encoder_packet *packet, bool *received_packet); + + /** + * Returns whether texture encoding is available for this video format + * + * Has no effect if caps does not contain OBS_ENCODER_CAP_PASS_TEXTURE. + * If this function is not defined, it is assumed that only textures in + * NV12 format are supported. + * + * @param data Data associated with this encoder context + * @param[in] info Video format information + * @return Whether the encoder supports texture encoding + * with this video format + */ + bool (*encode_texture_available)(void *data, + const struct video_scale_info *info); + + bool (*encode_texture2)(void *data, struct encoder_texture *texture, + int64_t pts, uint64_t lock_key, + uint64_t *next_key, + struct encoder_packet *packet, + bool *received_packet); }; EXPORT void obs_register_encoder_s(const struct obs_encoder_info *info, diff --git a/libobs/obs-internal.h b/libobs/obs-internal.h index 6e975d065..2abfbddee 100644 --- a/libobs/obs-internal.h +++ b/libobs/obs-internal.h @@ -1259,6 +1259,9 @@ extern struct obs_encoder_info *find_encoder(const char *id); extern bool obs_encoder_initialize(obs_encoder_t *encoder); extern void obs_encoder_shutdown(obs_encoder_t *encoder); +extern void obs_encoder_get_video_info(struct obs_encoder *encoder, + struct video_scale_info *info); + extern void obs_encoder_start(obs_encoder_t *encoder, void (*new_packet)(void *param, struct encoder_packet *packet), diff --git a/libobs/obs-module.c b/libobs/obs-module.c index cd7841b6c..443c97b23 100644 --- a/libobs/obs-module.c +++ b/libobs/obs-module.c @@ -679,16 +679,30 @@ cleanup: da_push_back(dest, &data); \ } while (false) -#define CHECK_REQUIRED_VAL(type, info, val, func) \ - do { \ - if ((offsetof(type, val) + sizeof(info->val) > size) || \ - !info->val) { \ - blog(LOG_ERROR, \ - "Required value '" #val "' for " \ - "'%s' not found. " #func " failed.", \ - info->id); \ - goto error; \ - } \ +#define HAS_VAL(type, info, val) \ + ((offsetof(type, val) + sizeof(info->val) <= size) && info->val) + +#define CHECK_REQUIRED_VAL(type, info, val, func) \ + do { \ + if (!HAS_VAL(type, info, val)) { \ + blog(LOG_ERROR, \ + "Required value '" #val "' for " \ + "'%s' not found. " #func " failed.", \ + info->id); \ + goto error; \ + } \ + } while (false) + +#define CHECK_REQUIRED_VAL_EITHER(type, info, val1, val2, func) \ + do { \ + if (!HAS_VAL(type, info, val1) && \ + !HAS_VAL(type, info, val2)) { \ + blog(LOG_ERROR, \ + "Neither '" #val1 "' nor '" #val2 "' " \ + "for '%s' found. " #func " failed.", \ + info->id); \ + goto error; \ + } \ } while (false) #define HANDLE_ERROR(size_var, structure, info) \ @@ -899,7 +913,9 @@ void obs_register_encoder_s(const struct obs_encoder_info *info, size_t size) CHECK_REQUIRED_VAL_(info, destroy, obs_register_encoder); if ((info->caps & OBS_ENCODER_CAP_PASS_TEXTURE) != 0) - CHECK_REQUIRED_VAL_(info, encode_texture, obs_register_encoder); + CHECK_REQUIRED_VAL_EITHER(struct obs_encoder_info, info, + encode_texture, encode_texture2, + obs_register_encoder); else CHECK_REQUIRED_VAL_(info, encode, obs_register_encoder); diff --git a/libobs/obs-video-gpu-encode.c b/libobs/obs-video-gpu-encode.c index 0d65a6d17..394ded9b6 100644 --- a/libobs/obs-video-gpu-encode.c +++ b/libobs/obs-video-gpu-encode.c @@ -78,7 +78,7 @@ static void *gpu_encode_thread(void *data) for (size_t i = 0; i < encoders.num; i++) { struct encoder_packet pkt = {0}; bool received = false; - bool success; + bool success = false; obs_encoder_t *encoder = encoders.array[i]; struct obs_encoder *pair = encoder->paired_encoder; @@ -111,10 +111,33 @@ static void *gpu_encode_thread(void *data) else next_key++; - success = encoder->info.encode_texture( - encoder->context.data, tf.handle, - encoder->cur_pts, lock_key, &next_key, &pkt, - &received); + if (encoder->info.encode_texture2) { + union { + struct encoder_texture tex; + /* MSVC complains about + offsetof(..., tex[3]) */ + char dummy[offsetof(struct encoder_texture, + tex) + + sizeof(struct gs_texture *) * + 3]; + } u = {0}; + + obs_encoder_get_video_info(encoder, + &u.tex.info); + u.tex.handle = tf.handle; + u.tex.tex[0] = tf.tex; + u.tex.tex[1] = tf.tex_uv; + u.tex.tex[2] = NULL; + success = encoder->info.encode_texture2( + encoder->context.data, &u.tex, + encoder->cur_pts, lock_key, &next_key, + &pkt, &received); + } else { + success = encoder->info.encode_texture( + encoder->context.data, tf.handle, + encoder->cur_pts, lock_key, &next_key, + &pkt, &received); + } send_off_encoder_packet(encoder, success, received, &pkt); -- 2.40.0 From 0475795cdbf7c7bcb90bb373a10bbd5a7ce17a07 Mon Sep 17 00:00:00 2001 From: David Rosca Date: Thu, 30 Mar 2023 16:48:02 +0200 Subject: [PATCH 5/5] obs-ffmpeg: Implement Linux AMF texture encoding v2: don't require vk_enum_string_helper.h v3: only use one set of GL interop textures v4: wait on Vulkan copy fence v5: wait on GL copy sem + Vulkan external queue transfer v6: use optimal tiling v7: fix some validation errors v8: init AMF context with our Vulkan device --- plugins/obs-ffmpeg/CMakeLists.txt | 3 +- plugins/obs-ffmpeg/cmake/legacy.cmake | 3 +- plugins/obs-ffmpeg/texture-amf.cpp | 939 +++++++++++++++++++++++++- 3 files changed, 913 insertions(+), 32 deletions(-) diff --git a/plugins/obs-ffmpeg/CMakeLists.txt b/plugins/obs-ffmpeg/CMakeLists.txt index 778d93ffb..97376182f 100644 --- a/plugins/obs-ffmpeg/CMakeLists.txt +++ b/plugins/obs-ffmpeg/CMakeLists.txt @@ -112,9 +112,10 @@ elseif(OS_LINUX OR OS_FREEBSD) find_package(Libva REQUIRED) find_package(Libpci REQUIRED) + find_package(Vulkan REQUIRED) target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h texture-amf.cpp) - target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm Libpci::pci) + target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm Libpci::pci Vulkan::Vulkan) endif() set_target_properties_obs(obs-ffmpeg PROPERTIES FOLDER plugins/obs-ffmpeg PREFIX "") diff --git a/plugins/obs-ffmpeg/cmake/legacy.cmake b/plugins/obs-ffmpeg/cmake/legacy.cmake index 78b8c30a1..b29eef673 100644 --- a/plugins/obs-ffmpeg/cmake/legacy.cmake +++ b/plugins/obs-ffmpeg/cmake/legacy.cmake @@ -109,8 +109,9 @@ elseif(OS_POSIX AND NOT OS_MACOS) add_subdirectory(obs-amf-test) find_package(Libva REQUIRED) find_package(Libpci REQUIRED) + find_package(Vulkan REQUIRED) target_sources(obs-ffmpeg PRIVATE obs-ffmpeg-vaapi.c vaapi-utils.c vaapi-utils.h texture-amf.cpp) - target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm LIBPCI::LIBPCI) + target_link_libraries(obs-ffmpeg PRIVATE Libva::va Libva::drm LIBPCI::LIBPCI Vulkan::Vulkan) endif() setup_plugin_target(obs-ffmpeg) diff --git a/plugins/obs-ffmpeg/texture-amf.cpp b/plugins/obs-ffmpeg/texture-amf.cpp index fe651f0e1..0f5ee7b91 100644 --- a/plugins/obs-ffmpeg/texture-amf.cpp +++ b/plugins/obs-ffmpeg/texture-amf.cpp @@ -29,6 +29,15 @@ #include #endif +#ifdef __linux +#include +#include +#include +#include +#include +#include +#endif + #include #include #include @@ -57,14 +66,89 @@ struct amf_error { } }; +#define VK_CHECK(f) \ + { \ + VkResult res = (f); \ + if (res != VK_SUCCESS) { \ + blog(LOG_ERROR, "Vulkan error: " __FILE__ ":%d", \ + __LINE__); \ + throw "Vulkan error"; \ + } \ + } + +static VkFormat to_vk_format(AMF_SURFACE_FORMAT fmt) +{ + switch (fmt) { + case AMF_SURFACE_NV12: + return VK_FORMAT_G8_B8R8_2PLANE_420_UNORM; + case AMF_SURFACE_P010: + return VK_FORMAT_G16_B16R16_2PLANE_420_UNORM; + default: + throw "Unsupported AMF_SURFACE_FORMAT"; + } +} + +static VkFormat to_vk_format(enum gs_color_format fmt) +{ + switch (fmt) { + case GS_R8: + return VK_FORMAT_R8_UNORM; + case GS_R16: + return VK_FORMAT_R16_UNORM; + case GS_R8G8: + return VK_FORMAT_R8G8_UNORM; + case GS_RG16: + return VK_FORMAT_R16G16_UNORM; + default: + throw "Unsupported gs_color_format"; + } +} + +static GLenum to_gl_format(enum gs_color_format fmt) +{ + switch (fmt) { + case GS_R8: + return GL_R8; + case GS_R16: + return GL_R16; + case GS_R8G8: + return GL_RG8; + case GS_RG16: + return GL_RG16; + default: + throw "Unsupported gs_color_format"; + } +} + struct handle_tex { uint32_t handle; #ifdef _WIN32 ComPtr tex; ComPtr km; +#else + AMFVulkanSurface *surfaceVk = nullptr; #endif }; +#ifdef __linux +struct gl_tex { + GLuint glsem = 0; + VkSemaphore sem = VK_NULL_HANDLE; + GLuint glCopySem = 0; + VkSemaphore copySem = VK_NULL_HANDLE; + VkFence copyFence = VK_NULL_HANDLE; + struct { + uint32_t width = 0; + uint32_t height = 0; + VkImage image = VK_NULL_HANDLE; + VkDeviceMemory memory = VK_NULL_HANDLE; + GLuint glmem = 0; + GLuint gltex = 0; + GLuint fbo = 0; + } planes[2]; +}; +#endif + struct adapter_caps { bool is_amd = false; bool supports_avc = false; @@ -97,6 +181,7 @@ struct amf_base { bool fallback; AMFContextPtr amf_context; + AMFContext1Ptr amf_context1; AMFComponentPtr amf_encoder; AMFBufferPtr packet_data; AMFRate amf_frame_rate; @@ -130,6 +215,9 @@ using buf_t = std::vector; #ifdef _WIN32 using d3dtex_t = ComPtr; +#else +using d3dtex_t = handle_tex; +#endif struct amf_texencode : amf_base, public AMFSurfaceObserver { volatile bool destroying = false; @@ -140,11 +228,92 @@ struct amf_texencode : amf_base, public AMFSurfaceObserver { std::vector available_textures; std::unordered_map active_textures; +#ifdef _WIN32 ComPtr device; ComPtr context; +#else + std::unique_ptr vk; + VkQueue queue = VK_NULL_HANDLE; + VkCommandPool cmdpool = VK_NULL_HANDLE; + VkCommandBuffer cmdbuf = VK_NULL_HANDLE; + struct gl_tex gltex = {}; + std::unordered_map read_fbos; + + PFN_vkGetMemoryFdKHR vkGetMemoryFdKHR; + PFN_vkGetSemaphoreFdKHR vkGetSemaphoreFdKHR; + PFNGLGETERRORPROC glGetError; + PFNGLCREATEMEMORYOBJECTSEXTPROC glCreateMemoryObjectsEXT; + PFNGLDELETEMEMORYOBJECTSEXTPROC glDeleteMemoryObjectsEXT; + PFNGLIMPORTMEMORYFDEXTPROC glImportMemoryFdEXT; + PFNGLISMEMORYOBJECTEXTPROC glIsMemoryObjectEXT; + PFNGLMEMORYOBJECTPARAMETERIVEXTPROC glMemoryObjectParameterivEXT; + PFNGLGENTEXTURESPROC glGenTextures; + PFNGLDELETETEXTURESPROC glDeleteTextures; + PFNGLBINDTEXTUREPROC glBindTexture; + PFNGLTEXPARAMETERIPROC glTexParameteri; + PFNGLTEXSTORAGEMEM2DEXTPROC glTexStorageMem2DEXT; + PFNGLGENSEMAPHORESEXTPROC glGenSemaphoresEXT; + PFNGLDELETESEMAPHORESEXTPROC glDeleteSemaphoresEXT; + PFNGLIMPORTSEMAPHOREFDEXTPROC glImportSemaphoreFdEXT; + PFNGLISSEMAPHOREEXTPROC glIsSemaphoreEXT; + PFNGLWAITSEMAPHOREEXTPROC glWaitSemaphoreEXT; + PFNGLSIGNALSEMAPHOREEXTPROC glSignalSemaphoreEXT; + PFNGLGENFRAMEBUFFERSPROC glGenFramebuffers; + PFNGLDELETEFRAMEBUFFERSPROC glDeleteFramebuffers; + PFNGLBINDFRAMEBUFFERPROC glBindFramebuffer; + PFNGLFRAMEBUFFERTEXTURE2DPROC glFramebufferTexture2D; + PFNGLBLITFRAMEBUFFERPROC glBlitFramebuffer; +#endif inline amf_texencode() : amf_base(false) {} - ~amf_texencode() { os_atomic_set_bool(&destroying, true); } + ~amf_texencode() + { + os_atomic_set_bool(&destroying, true); +#ifdef __linux + if (!vk) + return; + + vkDeviceWaitIdle(vk->hDevice); + vkFreeCommandBuffers(vk->hDevice, cmdpool, 1, &cmdbuf); + vkDestroyCommandPool(vk->hDevice, cmdpool, nullptr); + + for (auto t : input_textures) { + vkFreeMemory(vk->hDevice, t.surfaceVk->hMemory, + nullptr); + vkDestroyImage(vk->hDevice, t.surfaceVk->hImage, + nullptr); + delete t.surfaceVk; + } + + obs_enter_graphics(); + + for (int i = 0; i < 2; ++i) { + auto p = gltex.planes[i]; + vkFreeMemory(vk->hDevice, p.memory, nullptr); + vkDestroyImage(vk->hDevice, p.image, nullptr); + this->glDeleteMemoryObjectsEXT(1, &p.glmem); + this->glDeleteTextures(1, &p.gltex); + this->glDeleteFramebuffers(1, &p.fbo); + } + vkDestroySemaphore(vk->hDevice, gltex.sem, nullptr); + vkDestroySemaphore(vk->hDevice, gltex.copySem, nullptr); + vkDestroyFence(vk->hDevice, gltex.copyFence, nullptr); + this->glDeleteSemaphoresEXT(1, &gltex.glsem); + this->glDeleteSemaphoresEXT(1, &gltex.glCopySem); + + for (auto f : read_fbos) + this->glDeleteFramebuffers(1, &f.second); + + obs_leave_graphics(); + + amf_encoder->Terminate(); + amf_context1->Terminate(); + amf_context->Terminate(); + + vkDestroyDevice(vk->hDevice, nullptr); + vkDestroyInstance(vk->hInstance, nullptr); +#endif + } void AMF_STD_CALL OnSurfaceDataRelease(amf::AMFSurface *surf) override { @@ -162,12 +331,189 @@ struct amf_texencode : amf_base, public AMFSurfaceObserver { void init() override { +#if defined(_WIN32) AMF_RESULT res = amf_context->InitDX11(device, AMF_DX11_1); if (res != AMF_OK) throw amf_error("InitDX11 failed", res); +#elif defined(__linux__) + vk = std::make_unique(); + vk->cbSizeof = sizeof(AMFVulkanDevice); + + std::vector instance_extensions = { + VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME, + VK_KHR_SURFACE_EXTENSION_NAME, + }; + + std::vector device_extensions = { + VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, + VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME, + VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME, + VK_KHR_SAMPLER_YCBCR_CONVERSION_EXTENSION_NAME, + }; + + amf_size count = 0; + amf_context1->GetVulkanDeviceExtensions(&count, nullptr); + device_extensions.resize(device_extensions.size() + count); + amf_context1->GetVulkanDeviceExtensions( + &count, + &device_extensions[device_extensions.size() - count]); + + VkApplicationInfo appInfo = {}; + appInfo.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO; + appInfo.pApplicationName = "OBS"; + appInfo.apiVersion = VK_API_VERSION_1_2; + + VkInstanceCreateInfo instanceInfo = {}; + instanceInfo.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO; + instanceInfo.pApplicationInfo = &appInfo; + instanceInfo.enabledExtensionCount = instance_extensions.size(); + instanceInfo.ppEnabledExtensionNames = + instance_extensions.data(); + VK_CHECK(vkCreateInstance(&instanceInfo, nullptr, + &vk->hInstance)); + + uint32_t deviceCount = 0; + VK_CHECK(vkEnumeratePhysicalDevices(vk->hInstance, &deviceCount, + nullptr)); + std::vector physicalDevices(deviceCount); + VK_CHECK(vkEnumeratePhysicalDevices(vk->hInstance, &deviceCount, + physicalDevices.data())); + for (VkPhysicalDevice dev : physicalDevices) { + VkPhysicalDeviceDriverProperties driverProps = {}; + driverProps.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + + VkPhysicalDeviceProperties2 props = {}; + props.sType = + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + props.pNext = &driverProps; + vkGetPhysicalDeviceProperties2(dev, &props); + if (driverProps.driverID == + VK_DRIVER_ID_AMD_PROPRIETARY) { + vk->hPhysicalDevice = dev; + break; + } + } + if (!vk->hPhysicalDevice) { + throw "Failed to find Vulkan device VK_DRIVER_ID_AMD_PROPRIETARY"; + } + + uint32_t deviceExtensionCount = 0; + VK_CHECK(vkEnumerateDeviceExtensionProperties( + vk->hPhysicalDevice, nullptr, &deviceExtensionCount, + nullptr)); + std::vector deviceExts( + deviceExtensionCount); + VK_CHECK(vkEnumerateDeviceExtensionProperties( + vk->hPhysicalDevice, nullptr, &deviceExtensionCount, + deviceExts.data())); + std::vector deviceExtensions; + for (const char *name : device_extensions) { + auto it = std::find_if( + deviceExts.begin(), deviceExts.end(), + [name](VkExtensionProperties e) { + return strcmp(e.extensionName, name) == + 0; + }); + if (it != deviceExts.end()) { + deviceExtensions.push_back(name); + } + } + + float queuePriority = 1.0; + std::vector queueInfos; + uint32_t queueFamilyCount; + vkGetPhysicalDeviceQueueFamilyProperties( + vk->hPhysicalDevice, &queueFamilyCount, nullptr); + std::vector queueFamilyProperties( + queueFamilyCount); + vkGetPhysicalDeviceQueueFamilyProperties( + vk->hPhysicalDevice, &queueFamilyCount, + queueFamilyProperties.data()); + for (uint32_t i = 0; i < queueFamilyProperties.size(); ++i) { + VkDeviceQueueCreateInfo queueInfo = {}; + queueInfo.sType = + VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO; + queueInfo.queueFamilyIndex = i; + queueInfo.queueCount = 1; + queueInfo.pQueuePriorities = &queuePriority; + queueInfos.push_back(queueInfo); + } + + VkDeviceCreateInfo deviceInfo = {}; + deviceInfo.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO; + deviceInfo.queueCreateInfoCount = queueInfos.size(); + deviceInfo.pQueueCreateInfos = queueInfos.data(); + deviceInfo.enabledExtensionCount = deviceExtensions.size(); + deviceInfo.ppEnabledExtensionNames = deviceExtensions.data(); + VK_CHECK(vkCreateDevice(vk->hPhysicalDevice, &deviceInfo, + nullptr, &vk->hDevice)); + + AMF_RESULT res = amf_context1->InitVulkan(vk.get()); + if (res != AMF_OK) + throw amf_error("InitVulkan failed", res); + + vkGetDeviceQueue(vk->hDevice, 0, 0, &queue); + + VkCommandPoolCreateInfo cmdPoolInfo = {}; + cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO; + cmdPoolInfo.queueFamilyIndex = 0; + cmdPoolInfo.flags = + VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT; + VK_CHECK(vkCreateCommandPool(vk->hDevice, &cmdPoolInfo, nullptr, + &cmdpool)); + + VkCommandBufferAllocateInfo commandBufferInfo = {}; + commandBufferInfo.sType = + VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + commandBufferInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + commandBufferInfo.commandPool = cmdpool; + commandBufferInfo.commandBufferCount = 1; + VK_CHECK(vkAllocateCommandBuffers(vk->hDevice, + &commandBufferInfo, &cmdbuf)); + +#define GET_PROC_VK(x) \ + x = reinterpret_cast( \ + vkGetDeviceProcAddr(vk->hDevice, #x)); \ + if (!x) \ + throw "Failed to resolve " #x; + +#define GET_PROC_GL(x) \ + x = reinterpret_cast(eglGetProcAddress(#x)); \ + if (!x) \ + throw "Failed to resolve " #x; + + GET_PROC_VK(vkGetMemoryFdKHR); + GET_PROC_VK(vkGetSemaphoreFdKHR); + GET_PROC_GL(glGetError); + GET_PROC_GL(glCreateMemoryObjectsEXT); + GET_PROC_GL(glDeleteMemoryObjectsEXT); + GET_PROC_GL(glImportMemoryFdEXT); + GET_PROC_GL(glIsMemoryObjectEXT); + GET_PROC_GL(glMemoryObjectParameterivEXT); + GET_PROC_GL(glGenTextures); + GET_PROC_GL(glDeleteTextures); + GET_PROC_GL(glBindTexture); + GET_PROC_GL(glTexParameteri); + GET_PROC_GL(glTexStorageMem2DEXT); + GET_PROC_GL(glGenSemaphoresEXT); + GET_PROC_GL(glDeleteSemaphoresEXT); + GET_PROC_GL(glImportSemaphoreFdEXT); + GET_PROC_GL(glIsSemaphoreEXT); + GET_PROC_GL(glWaitSemaphoreEXT); + GET_PROC_GL(glSignalSemaphoreEXT); + GET_PROC_GL(glGenFramebuffers); + GET_PROC_GL(glDeleteFramebuffers); + GET_PROC_GL(glBindFramebuffer); + GET_PROC_GL(glFramebufferTexture2D); + GET_PROC_GL(glBlitFramebuffer); + +#undef GET_PROC_VK +#undef GET_PROC_GL + +#endif } }; -#endif struct amf_fallback : amf_base, public AMFSurfaceObserver { volatile bool destroying = false; @@ -200,13 +546,7 @@ struct amf_fallback : amf_base, public AMFSurfaceObserver { if (res != AMF_OK) throw amf_error("InitDX11 failed", res); #elif defined(__linux__) - AMFContext1 *context1 = NULL; - AMF_RESULT res = amf_context->QueryInterface( - AMFContext1::IID(), (void **)&context1); - if (res != AMF_OK) - throw amf_error("CreateContext1 failed", res); - res = context1->InitVulkan(nullptr); - context1->Release(); + AMF_RESULT res = amf_context1->InitVulkan(nullptr); if (res != AMF_OK) throw amf_error("InitVulkan failed", res); #endif @@ -420,6 +760,361 @@ static void get_tex_from_handle(amf_texencode *enc, uint32_t handle, *km_out = km.Detach(); *tex_out = tex.Detach(); } +#else +static uint32_t memoryTypeIndex(amf_texencode *enc, + VkMemoryPropertyFlags properties, + uint32_t typeBits) +{ + VkPhysicalDeviceMemoryProperties prop; + vkGetPhysicalDeviceMemoryProperties(enc->vk->hPhysicalDevice, &prop); + for (uint32_t i = 0; i < prop.memoryTypeCount; i++) { + if ((prop.memoryTypes[i].propertyFlags & properties) == + properties && + typeBits & (1 << i)) { + return i; + } + } + return 0xFFFFFFFF; +} + +static void cmd_buf_begin(amf_texencode *enc) +{ + VkCommandBufferBeginInfo commandBufferBegin = {}; + commandBufferBegin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + VK_CHECK(vkBeginCommandBuffer(enc->cmdbuf, &commandBufferBegin)); +} + +static void cmd_buf_submit(amf_texencode *enc, VkSemaphore *semaphore = nullptr, + VkFence *fence = nullptr) +{ + VK_CHECK(vkEndCommandBuffer(enc->cmdbuf)); + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &enc->cmdbuf; + submitInfo.signalSemaphoreCount = semaphore ? 1 : 0; + submitInfo.pSignalSemaphores = semaphore; + if (fence) { + VK_CHECK(vkQueueSubmit(enc->queue, 1, &submitInfo, *fence)); + return; + } + VkFenceCreateInfo fenceInfo = {}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VkFence f; + VK_CHECK(vkCreateFence(enc->vk->hDevice, &fenceInfo, nullptr, &f)); + VK_CHECK(vkQueueSubmit(enc->queue, 1, &submitInfo, f)); + VK_CHECK(vkWaitForFences(enc->vk->hDevice, 1, &f, VK_TRUE, UINT64_MAX)); + vkDestroyFence(enc->vk->hDevice, f, nullptr); +} + +static void add_output_tex(amf_texencode *enc, handle_tex &output_tex, + encoder_texture *from) +{ + output_tex.surfaceVk = new AMFVulkanSurface; + output_tex.surfaceVk->cbSizeof = sizeof(AMFVulkanSurface); + output_tex.surfaceVk->pNext = nullptr; + + VkImageCreateInfo imageInfo = {}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.format = to_vk_format(enc->amf_format); + imageInfo.extent.width = from->info.width; + imageInfo.extent.height = from->info.height; + imageInfo.extent.depth = 1; + imageInfo.arrayLayers = 1; + imageInfo.mipLevels = 1; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageInfo.tiling = VK_IMAGE_TILING_LINEAR; + imageInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_DST_BIT; + imageInfo.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT; + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + VK_CHECK(vkCreateImage(enc->vk->hDevice, &imageInfo, nullptr, + &output_tex.surfaceVk->hImage)); + + VkMemoryRequirements memoryReqs; + vkGetImageMemoryRequirements(enc->vk->hDevice, + output_tex.surfaceVk->hImage, &memoryReqs); + VkMemoryAllocateInfo memoryAllocInfo = {}; + memoryAllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memoryAllocInfo.allocationSize = memoryReqs.size; + memoryAllocInfo.memoryTypeIndex = + memoryTypeIndex(enc, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + memoryReqs.memoryTypeBits); + VK_CHECK(vkAllocateMemory(enc->vk->hDevice, &memoryAllocInfo, nullptr, + &output_tex.surfaceVk->hMemory)); + VK_CHECK(vkBindImageMemory(enc->vk->hDevice, + output_tex.surfaceVk->hImage, + output_tex.surfaceVk->hMemory, 0)); + + cmd_buf_begin(enc); + VkImageMemoryBarrier imageBarrier = {}; + imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageBarrier.image = output_tex.surfaceVk->hImage; + imageBarrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageBarrier.subresourceRange.layerCount = 1; + imageBarrier.subresourceRange.levelCount = 1; + imageBarrier.srcAccessMask = 0; + imageBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | + VK_ACCESS_MEMORY_WRITE_BIT; + vkCmdPipelineBarrier(enc->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 1, &imageBarrier); + cmd_buf_submit(enc); + + output_tex.surfaceVk->iSize = memoryAllocInfo.allocationSize; + output_tex.surfaceVk->eFormat = imageInfo.format; + output_tex.surfaceVk->iWidth = imageInfo.extent.width; + output_tex.surfaceVk->iHeight = imageInfo.extent.height; + output_tex.surfaceVk->eCurrentLayout = imageInfo.initialLayout; + output_tex.surfaceVk->eUsage = AMF_SURFACE_USAGE_DEFAULT; + output_tex.surfaceVk->eAccess = AMF_MEMORY_CPU_LOCAL; + output_tex.surfaceVk->Sync.cbSizeof = sizeof(AMFVulkanSync); + output_tex.surfaceVk->Sync.pNext = nullptr; + output_tex.surfaceVk->Sync.hSemaphore = nullptr; + output_tex.surfaceVk->Sync.bSubmitted = true; + output_tex.surfaceVk->Sync.hFence = nullptr; + + enc->input_textures.push_back(output_tex); +} + +static inline void create_gl_tex(amf_texencode *enc, gl_tex &output_tex, + encoder_texture *from) +{ + if (output_tex.glsem) + return; + + cmd_buf_begin(enc); + for (int i = 0; i < 2; ++i) { + obs_enter_graphics(); + auto gs_format = gs_texture_get_color_format(from->tex[i]); + output_tex.planes[i].width = gs_texture_get_width(from->tex[i]); + output_tex.planes[i].height = + gs_texture_get_height(from->tex[i]); + obs_leave_graphics(); + + VkExternalMemoryImageCreateInfo extImageInfo = {}; + extImageInfo.sType = + VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO; + extImageInfo.handleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + + VkImageCreateInfo imageInfo = {}; + imageInfo.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO; + imageInfo.pNext = &extImageInfo; + imageInfo.imageType = VK_IMAGE_TYPE_2D; + imageInfo.format = to_vk_format(gs_format); + imageInfo.extent.width = output_tex.planes[i].width; + imageInfo.extent.height = output_tex.planes[i].height; + imageInfo.extent.depth = 1; + imageInfo.arrayLayers = 1; + imageInfo.mipLevels = 1; + imageInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageInfo.samples = VK_SAMPLE_COUNT_1_BIT; + imageInfo.tiling = VK_IMAGE_TILING_OPTIMAL; + imageInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | + VK_IMAGE_USAGE_TRANSFER_SRC_BIT; + imageInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE; + VK_CHECK(vkCreateImage(enc->vk->hDevice, &imageInfo, nullptr, + &output_tex.planes[i].image)); + + VkMemoryRequirements memoryReqs; + vkGetImageMemoryRequirements(enc->vk->hDevice, + output_tex.planes[i].image, + &memoryReqs); + + VkExportMemoryAllocateInfo expMemoryAllocInfo = {}; + expMemoryAllocInfo.sType = + VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO; + expMemoryAllocInfo.handleTypes = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + + VkMemoryDedicatedAllocateInfo dedMemoryAllocInfo = {}; + dedMemoryAllocInfo.sType = + VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO; + dedMemoryAllocInfo.image = output_tex.planes[i].image; + dedMemoryAllocInfo.pNext = &expMemoryAllocInfo; + + VkMemoryAllocateInfo memoryAllocInfo = {}; + memoryAllocInfo.pNext = &dedMemoryAllocInfo; + memoryAllocInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO; + memoryAllocInfo.allocationSize = memoryReqs.size; + memoryAllocInfo.memoryTypeIndex = memoryTypeIndex( + enc, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + memoryReqs.memoryTypeBits); + VK_CHECK(vkAllocateMemory(enc->vk->hDevice, &memoryAllocInfo, + nullptr, + &output_tex.planes[i].memory)); + VK_CHECK(vkBindImageMemory(enc->vk->hDevice, + output_tex.planes[i].image, + output_tex.planes[i].memory, 0)); + + VkImageMemoryBarrier imageBarrier = {}; + imageBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + imageBarrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + imageBarrier.image = output_tex.planes[i].image; + imageBarrier.subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT; + imageBarrier.subresourceRange.layerCount = 1; + imageBarrier.subresourceRange.levelCount = 1; + imageBarrier.srcAccessMask = 0; + imageBarrier.dstAccessMask = 0; + vkCmdPipelineBarrier(enc->cmdbuf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &imageBarrier); + + imageBarrier.oldLayout = imageBarrier.newLayout; + imageBarrier.srcQueueFamilyIndex = 0; + imageBarrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + vkCmdPipelineBarrier(enc->cmdbuf, + VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, + nullptr, 0, nullptr, 1, &imageBarrier); + + // Import memory + VkMemoryGetFdInfoKHR memFdInfo = {}; + memFdInfo.sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR; + memFdInfo.memory = output_tex.planes[i].memory; + memFdInfo.handleType = + VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; + int fd = -1; + VK_CHECK(enc->vkGetMemoryFdKHR(enc->vk->hDevice, &memFdInfo, + &fd)); + + obs_enter_graphics(); + + enc->glCreateMemoryObjectsEXT(1, &output_tex.planes[i].glmem); + GLint dedicated = GL_TRUE; + enc->glMemoryObjectParameterivEXT( + output_tex.planes[i].glmem, + GL_DEDICATED_MEMORY_OBJECT_EXT, &dedicated); + enc->glImportMemoryFdEXT(output_tex.planes[i].glmem, + memoryAllocInfo.allocationSize, + GL_HANDLE_TYPE_OPAQUE_FD_EXT, fd); + + enc->glGenTextures(1, &output_tex.planes[i].gltex); + enc->glBindTexture(GL_TEXTURE_2D, output_tex.planes[i].gltex); + enc->glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT, + GL_OPTIMAL_TILING_EXT); + enc->glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, + to_gl_format(gs_format), + imageInfo.extent.width, + imageInfo.extent.height, + output_tex.planes[i].glmem, 0); + + enc->glGenFramebuffers(1, &output_tex.planes[i].fbo); + enc->glBindFramebuffer(GL_FRAMEBUFFER, + output_tex.planes[i].fbo); + enc->glFramebufferTexture2D(GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, + output_tex.planes[i].gltex, 0); + enc->glBindFramebuffer(GL_FRAMEBUFFER, 0); + + bool import_ok = + enc->glIsMemoryObjectEXT(output_tex.planes[i].glmem) && + enc->glGetError() == GL_NO_ERROR; + + obs_leave_graphics(); + + if (!import_ok) + throw "OpenGL texture import failed"; + } + + VkExportSemaphoreCreateInfo expSemInfo = {}; + expSemInfo.sType = VK_STRUCTURE_TYPE_EXPORT_SEMAPHORE_CREATE_INFO; + expSemInfo.handleTypes = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT; + + VkSemaphoreCreateInfo semInfo = {}; + semInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + semInfo.pNext = &expSemInfo; + VK_CHECK(vkCreateSemaphore(enc->vk->hDevice, &semInfo, nullptr, + &output_tex.sem)); + + VK_CHECK(vkCreateSemaphore(enc->vk->hDevice, &semInfo, nullptr, + &output_tex.copySem)); + + VkFenceCreateInfo fenceInfo = {}; + fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; + VK_CHECK(vkCreateFence(enc->vk->hDevice, &fenceInfo, nullptr, + &output_tex.copyFence)); + + cmd_buf_submit(enc, &output_tex.copySem, &output_tex.copyFence); + + // Import semaphores + VkSemaphoreGetFdInfoKHR semFdInfo = {}; + semFdInfo.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR; + semFdInfo.semaphore = output_tex.sem; + semFdInfo.handleType = + VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR; + int fd = -1; + VK_CHECK(enc->vkGetSemaphoreFdKHR(enc->vk->hDevice, &semFdInfo, &fd)); + + semFdInfo.semaphore = output_tex.copySem; + int fdCopy = -1; + VK_CHECK(enc->vkGetSemaphoreFdKHR(enc->vk->hDevice, &semFdInfo, + &fdCopy)); + + obs_enter_graphics(); + + enc->glGenSemaphoresEXT(1, &output_tex.glsem); + enc->glGenSemaphoresEXT(1, &output_tex.glCopySem); + enc->glImportSemaphoreFdEXT(output_tex.glsem, + GL_HANDLE_TYPE_OPAQUE_FD_EXT, fd); + enc->glImportSemaphoreFdEXT(output_tex.glCopySem, + GL_HANDLE_TYPE_OPAQUE_FD_EXT, fdCopy); + + bool import_ok = enc->glIsSemaphoreEXT(output_tex.glsem) && + enc->glIsSemaphoreEXT(output_tex.glCopySem) && + enc->glGetError() == GL_NO_ERROR; + + obs_leave_graphics(); + + if (!import_ok) + throw "OpenGL semaphore import failed"; +} + +static inline bool get_available_tex(amf_texencode *enc, handle_tex &output_tex) +{ + std::scoped_lock lock(enc->textures_mutex); + if (enc->available_textures.size()) { + output_tex = enc->available_textures.back(); + enc->available_textures.pop_back(); + return true; + } + + return false; +} + +static inline void get_output_tex(amf_texencode *enc, handle_tex &output_tex, + encoder_texture *from) +{ + if (!get_available_tex(enc, output_tex)) + add_output_tex(enc, output_tex, from); + + create_gl_tex(enc, enc->gltex, from); +} + +static inline GLuint get_read_fbo(amf_texencode *enc, gs_texture *tex) +{ + auto it = enc->read_fbos.find(tex); + if (it != enc->read_fbos.end()) { + return it->second; + } + GLuint *tex_obj = static_cast(gs_texture_get_obj(tex)); + GLuint fbo; + enc->glGenFramebuffers(1, &fbo); + enc->glBindFramebuffer(GL_FRAMEBUFFER, fbo); + enc->glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, *tex_obj, 0); + enc->read_fbos.insert({tex, fbo}); + return fbo; +} #endif static constexpr amf_int64 macroblock_size = 16; @@ -756,6 +1451,197 @@ try { } #endif +static bool amf_encode_tex2(void *data, encoder_texture *texture, int64_t pts, + uint64_t lock_key, uint64_t *next_key, + encoder_packet *packet, bool *received_packet) +try { + UNUSED_PARAMETER(lock_key); + UNUSED_PARAMETER(next_key); + + amf_texencode *enc = (amf_texencode *)data; + handle_tex output_tex; + AMFSurfacePtr amf_surf; + AMF_RESULT res; + + if (!texture) { + throw "Encode failed: bad texture handle"; + } + + /* ------------------------------------ */ + /* get an output tex */ + + get_output_tex(enc, output_tex, texture); + + /* ------------------------------------ */ + /* copy to output tex */ + + VK_CHECK(vkWaitForFences(enc->vk->hDevice, 1, &enc->gltex.copyFence, + VK_TRUE, UINT64_MAX)); + VK_CHECK(vkResetFences(enc->vk->hDevice, 1, &enc->gltex.copyFence)); + + obs_enter_graphics(); + + GLuint sem_tex[2]; + GLenum sem_layout[2]; + for (int i = 0; i < 2; ++i) { + sem_tex[i] = enc->gltex.planes[i].gltex; + sem_layout[i] = GL_LAYOUT_TRANSFER_SRC_EXT; + } + enc->glWaitSemaphoreEXT(enc->gltex.glCopySem, 0, 0, 2, sem_tex, + sem_layout); + for (int i = 0; i < 2; ++i) { + GLuint read_fbo = get_read_fbo(enc, texture->tex[i]); + enc->glBindFramebuffer(GL_READ_FRAMEBUFFER, read_fbo); + enc->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, + enc->gltex.planes[i].fbo); + enc->glBlitFramebuffer(0, 0, enc->gltex.planes[i].width, + enc->gltex.planes[i].height, 0, 0, + enc->gltex.planes[i].width, + enc->gltex.planes[i].height, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + enc->glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + enc->glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + } + enc->glSignalSemaphoreEXT(enc->gltex.glsem, 0, 0, 2, sem_tex, + sem_layout); + + obs_leave_graphics(); + + res = enc->amf_context1->CreateSurfaceFromVulkanNative( + output_tex.surfaceVk, &amf_surf, enc); + if (res != AMF_OK) + throw amf_error("CreateSurfaceFromVulkanNative failed", res); + + /* ------------------------------------ */ + /* copy to submit tex */ + + VkCommandBufferBeginInfo commandBufferBegin = {}; + commandBufferBegin.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + VK_CHECK(vkBeginCommandBuffer(enc->cmdbuf, &commandBufferBegin)); + + VkImageMemoryBarrier imageBarriers[2]; + imageBarriers[0] = {}; + imageBarriers[0].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageBarriers[0].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + imageBarriers[0].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + imageBarriers[0].image = enc->gltex.planes[0].image; + imageBarriers[0].subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT; + imageBarriers[0].subresourceRange.layerCount = 1; + imageBarriers[0].subresourceRange.levelCount = 1; + imageBarriers[0].srcAccessMask = 0; + imageBarriers[0].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + imageBarriers[0].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + imageBarriers[0].dstQueueFamilyIndex = 0; + imageBarriers[1] = {}; + imageBarriers[1].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageBarriers[1].oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + imageBarriers[1].newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; + imageBarriers[1].image = enc->gltex.planes[1].image; + imageBarriers[1].subresourceRange.aspectMask = + VK_IMAGE_ASPECT_COLOR_BIT; + imageBarriers[1].subresourceRange.layerCount = 1; + imageBarriers[1].subresourceRange.levelCount = 1; + imageBarriers[1].srcAccessMask = 0; + imageBarriers[1].dstAccessMask = VK_ACCESS_MEMORY_READ_BIT; + imageBarriers[1].srcQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + imageBarriers[1].dstQueueFamilyIndex = 0; + vkCmdPipelineBarrier(enc->cmdbuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, nullptr, 0, + nullptr, 2, imageBarriers); + + VkImageCopy imageCopy = {}; + imageCopy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; + imageCopy.srcSubresource.mipLevel = 0; + imageCopy.srcSubresource.baseArrayLayer = 0; + imageCopy.srcSubresource.layerCount = 1; + imageCopy.srcOffset.x = 0; + imageCopy.srcOffset.y = 0; + imageCopy.srcOffset.z = 0; + imageCopy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_0_BIT; + imageCopy.dstSubresource.mipLevel = 0; + imageCopy.dstSubresource.baseArrayLayer = 0; + imageCopy.dstSubresource.layerCount = 1; + imageCopy.dstOffset.x = 0; + imageCopy.dstOffset.y = 0; + imageCopy.dstOffset.z = 0; + imageCopy.extent.width = enc->gltex.planes[0].width; + imageCopy.extent.height = enc->gltex.planes[0].height; + imageCopy.extent.depth = 1; + vkCmdCopyImage(enc->cmdbuf, enc->gltex.planes[0].image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + output_tex.surfaceVk->hImage, VK_IMAGE_LAYOUT_GENERAL, 1, + &imageCopy); + + imageCopy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT; + imageCopy.extent.width = enc->gltex.planes[1].width; + imageCopy.extent.height = enc->gltex.planes[1].height; + vkCmdCopyImage(enc->cmdbuf, enc->gltex.planes[1].image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + output_tex.surfaceVk->hImage, VK_IMAGE_LAYOUT_GENERAL, 1, + &imageCopy); + + imageBarriers[0].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; + imageBarriers[0].dstAccessMask = 0; + imageBarriers[0].srcQueueFamilyIndex = 0; + imageBarriers[0].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + imageBarriers[1].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; + imageBarriers[1].dstAccessMask = 0; + imageBarriers[1].srcQueueFamilyIndex = 0; + imageBarriers[1].dstQueueFamilyIndex = VK_QUEUE_FAMILY_EXTERNAL; + vkCmdPipelineBarrier(enc->cmdbuf, VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 0, 0, + nullptr, 0, nullptr, 2, imageBarriers); + + VK_CHECK(vkEndCommandBuffer(enc->cmdbuf)); + + VkPipelineStageFlags waitStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + VkSubmitInfo submitInfo = {}; + submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submitInfo.commandBufferCount = 1; + submitInfo.pCommandBuffers = &enc->cmdbuf; + submitInfo.waitSemaphoreCount = 1; + submitInfo.pWaitSemaphores = &enc->gltex.sem; + submitInfo.pWaitDstStageMask = &waitStage; + submitInfo.signalSemaphoreCount = 1; + submitInfo.pSignalSemaphores = &enc->gltex.copySem; + VK_CHECK(vkQueueSubmit(enc->queue, 1, &submitInfo, + enc->gltex.copyFence)); + + output_tex.surfaceVk->Sync.hSemaphore = enc->gltex.copySem; + output_tex.surfaceVk->Sync.bSubmitted = true; + + int64_t last_ts = convert_to_amf_ts(enc, pts - 1); + int64_t cur_ts = convert_to_amf_ts(enc, pts); + + amf_surf->SetPts(cur_ts); + amf_surf->SetProperty(L"PTS", pts); + + { + std::scoped_lock lock(enc->textures_mutex); + enc->active_textures[amf_surf.GetPtr()] = output_tex; + } + + /* ------------------------------------ */ + /* do actual encode */ + + amf_encode_base(enc, amf_surf, packet, received_packet); + return true; + +} catch (const char *err) { + amf_texencode *enc = (amf_texencode *)data; + error("%s: %s", __FUNCTION__, err); + *received_packet = false; + return false; + +} catch (const amf_error &err) { + amf_texencode *enc = (amf_texencode *)data; + error("%s: %s: %ls", __FUNCTION__, err.str, + amf_trace->GetResultText(err.res)); + *received_packet = false; + return false; +} + static buf_t alloc_buf(amf_fallback *enc) { buf_t buf; @@ -1016,6 +1902,8 @@ try { if (res != AMF_OK) throw amf_error("CreateContext failed", res); + enc->amf_context1 = AMFContext1Ptr(enc->amf_context); + enc->init(); const wchar_t *codec = nullptr; @@ -1449,7 +2337,6 @@ static void amf_avc_create_internal(amf_base *enc, obs_data_t *settings) static void *amf_avc_create_texencode(obs_data_t *settings, obs_encoder_t *encoder) -#ifdef _WIN32 try { check_texture_encode_capability(encoder, amf_codec_type::AVC); @@ -1457,8 +2344,10 @@ try { enc->encoder = encoder; enc->encoder_str = "texture-amf-h264"; +#ifdef _WIN32 if (!amf_init_d3d11(enc.get())) throw "Failed to create D3D11"; +#endif amf_avc_create_internal(enc.get(), settings); return enc.release(); @@ -1472,12 +2361,6 @@ try { blog(LOG_ERROR, "[texture-amf-h264] %s: %s", __FUNCTION__, err); return obs_encoder_create_rerouted(encoder, "h264_fallback_amf"); } -#else -{ - UNUSED_PARAMETER(settings); - return obs_encoder_create_rerouted(encoder, "h264_fallback_amf"); -} -#endif static void *amf_avc_create_fallback(obs_data_t *settings, obs_encoder_t *encoder) @@ -1533,6 +2416,7 @@ static void register_avc() /* FIXME: Figure out why encoder does not survive reconfiguration amf_encoder_info.update = amf_avc_update; */ amf_encoder_info.encode_texture = amf_encode_tex; + amf_encoder_info.encode_texture2 = amf_encode_tex2; amf_encoder_info.get_defaults = amf_defaults; amf_encoder_info.get_properties = amf_avc_properties; amf_encoder_info.get_extra_data = amf_extra_data; @@ -1544,6 +2428,7 @@ static void register_avc() amf_encoder_info.caps = OBS_ENCODER_CAP_INTERNAL | OBS_ENCODER_CAP_DYN_BITRATE; amf_encoder_info.encode_texture = nullptr; + amf_encoder_info.encode_texture2 = nullptr; amf_encoder_info.create = amf_avc_create_fallback; amf_encoder_info.encode = amf_encode_fallback; amf_encoder_info.get_video_info = h264_video_info_fallback; @@ -1801,7 +2686,6 @@ static void amf_hevc_create_internal(amf_base *enc, obs_data_t *settings) static void *amf_hevc_create_texencode(obs_data_t *settings, obs_encoder_t *encoder) -#ifdef _WIN32 try { check_texture_encode_capability(encoder, amf_codec_type::HEVC); @@ -1809,8 +2693,10 @@ try { enc->encoder = encoder; enc->encoder_str = "texture-amf-h265"; +#ifdef _WIN32 if (!amf_init_d3d11(enc.get())) throw "Failed to create D3D11"; +#endif amf_hevc_create_internal(enc.get(), settings); return enc.release(); @@ -1824,12 +2710,6 @@ try { blog(LOG_ERROR, "[texture-amf-h265] %s: %s", __FUNCTION__, err); return obs_encoder_create_rerouted(encoder, "h265_fallback_amf"); } -#else -{ - UNUSED_PARAMETER(settings); - return obs_encoder_create_rerouted(encoder, "h265_fallback_amf"); -} -#endif static void *amf_hevc_create_fallback(obs_data_t *settings, obs_encoder_t *encoder) @@ -1881,6 +2761,7 @@ static void register_hevc() /* FIXME: Figure out why encoder does not survive reconfiguration amf_encoder_info.update = amf_hevc_update; */ amf_encoder_info.encode_texture = amf_encode_tex; + amf_encoder_info.encode_texture2 = amf_encode_tex2; amf_encoder_info.get_defaults = amf_defaults; amf_encoder_info.get_properties = amf_hevc_properties; amf_encoder_info.get_extra_data = amf_extra_data; @@ -1892,6 +2773,7 @@ static void register_hevc() amf_encoder_info.caps = OBS_ENCODER_CAP_INTERNAL | OBS_ENCODER_CAP_DYN_BITRATE; amf_encoder_info.encode_texture = nullptr; + amf_encoder_info.encode_texture2 = nullptr; amf_encoder_info.create = amf_hevc_create_fallback; amf_encoder_info.encode = amf_encode_fallback; amf_encoder_info.get_video_info = h265_video_info_fallback; @@ -2110,7 +2992,6 @@ static void amf_av1_create_internal(amf_base *enc, obs_data_t *settings) static void *amf_av1_create_texencode(obs_data_t *settings, obs_encoder_t *encoder) -#ifdef _WIN32 try { check_texture_encode_capability(encoder, amf_codec_type::AV1); @@ -2118,8 +2999,10 @@ try { enc->encoder = encoder; enc->encoder_str = "texture-amf-av1"; +#ifdef _WIN32 if (!amf_init_d3d11(enc.get())) throw "Failed to create D3D11"; +#endif amf_av1_create_internal(enc.get(), settings); return enc.release(); @@ -2133,12 +3016,6 @@ try { blog(LOG_ERROR, "[texture-amf-av1] %s: %s", __FUNCTION__, err); return obs_encoder_create_rerouted(encoder, "av1_fallback_amf"); } -#else -{ - UNUSED_PARAMETER(settings); - return obs_encoder_create_rerouted(encoder, "av1_fallback_amf"); -} -#endif static void *amf_av1_create_fallback(obs_data_t *settings, obs_encoder_t *encoder) @@ -2200,6 +3077,7 @@ static void register_av1() /* FIXME: Figure out why encoder does not survive reconfiguration amf_encoder_info.update = amf_av1_update; */ amf_encoder_info.encode_texture = amf_encode_tex; + amf_encoder_info.encode_texture2 = amf_encode_tex2; amf_encoder_info.get_defaults = amf_av1_defaults; amf_encoder_info.get_properties = amf_av1_properties; amf_encoder_info.get_extra_data = amf_extra_data; @@ -2211,6 +3089,7 @@ static void register_av1() amf_encoder_info.caps = OBS_ENCODER_CAP_INTERNAL | OBS_ENCODER_CAP_DYN_BITRATE; amf_encoder_info.encode_texture = nullptr; + amf_encoder_info.encode_texture2 = nullptr; amf_encoder_info.create = amf_av1_create_fallback; amf_encoder_info.encode = amf_encode_fallback; amf_encoder_info.get_video_info = av1_video_info_fallback; -- 2.40.0