From 4e9f975935b4208b29e158dabe62f8ad1122a447 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 16:08:33 -0400 Subject: [PATCH 01/11] Nvdrv: Correct Async regression and avoid signaling empty buffer vsyncs --- src/core/hle/service/nvdrv/nvdrv.cpp | 4 ++-- src/core/hle/service/nvflinger/nvflinger.cpp | 8 +++++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/nvdrv/nvdrv.cpp b/src/core/hle/service/nvdrv/nvdrv.cpp index 307a7e928..7bfb99e34 100644 --- a/src/core/hle/service/nvdrv/nvdrv.cpp +++ b/src/core/hle/service/nvdrv/nvdrv.cpp @@ -40,8 +40,8 @@ Module::Module(Core::System& system) { auto& kernel = system.Kernel(); for (u32 i = 0; i < MaxNvEvents; i++) { std::string event_label = fmt::format("NVDRV::NvEvent_{}", i); - events_interface.events[i] = Kernel::WritableEvent::CreateEventPair( - kernel, Kernel::ResetType::Automatic, event_label); + events_interface.events[i] = + Kernel::WritableEvent::CreateEventPair(kernel, Kernel::ResetType::Manual, event_label); events_interface.status[i] = EventState::Free; events_interface.registered[i] = false; } diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 2e4d707b9..3b251f8c8 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -170,8 +170,13 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { void NVFlinger::Compose() { for (auto& display : displays) { + bool trigger_event = false; // Trigger vsync for this display at the end of drawing - SCOPE_EXIT({ display.SignalVSyncEvent(); }); + SCOPE_EXIT({ + if (trigger_event) { + display.SignalVSyncEvent(); + } + }); // Don't do anything for displays without layers. if (!display.HasLayers()) @@ -194,6 +199,7 @@ void NVFlinger::Compose() { } const auto& igbp_buffer = buffer->get().igbp_buffer; + trigger_event = true; // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based From 5b5e60ffeca1a718cd980e74f0528d6ab91788cf Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 25 Sep 2019 19:43:23 -0400 Subject: [PATCH 02/11] GPU_Async: Correct fences, display events and more. This commit uses guest fences on vSync event instead of an articial fake fence we had. It also corrects to keep signaling display events while loading the game as the OS is suppose to send buffers to vSync during that time. --- src/core/hle/service/nvflinger/nvflinger.cpp | 21 ++++++++++++++++++-- src/core/hle/service/nvflinger/nvflinger.h | 2 ++ src/video_core/gpu.cpp | 13 ++++++++++++ src/video_core/gpu.h | 3 +++ src/video_core/gpu_thread.cpp | 14 +------------ src/video_core/gpu_thread.h | 6 ------ 6 files changed, 38 insertions(+), 21 deletions(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3b251f8c8..86a90526c 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -36,6 +36,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(3, "Internal", system); displays.emplace_back(4, "Null", system); + for (auto& display : displays) { + display.SignalVSyncEvent(); + } + // Schedule the screen composition events composition_event = system.CoreTiming().RegisterEvent( "ScreenComposition", [this](u64 userdata, s64 cycles_late) { @@ -173,7 +177,13 @@ void NVFlinger::Compose() { bool trigger_event = false; // Trigger vsync for this display at the end of drawing SCOPE_EXIT({ - if (trigger_event) { + // TODO(Blinkhawk): Correctly send buffers through nvflinger while + // loading the game thorugh the OS. + // During loading, the OS takes care of sending buffers to vsync, + // thus it triggers, since this is not properly emulated due to + // HLE complications, we allow it to signal until the game enqueues + // it's first buffer. + if (trigger_event || !first_buffer_enqueued) { display.SignalVSyncEvent(); } }); @@ -193,13 +203,20 @@ void NVFlinger::Compose() { if (!buffer) { // There was no queued buffer to draw, render previous frame - system.GetPerfStats().EndGameFrame(); system.GPU().SwapBuffers({}); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; trigger_event = true; + first_buffer_enqueued = true; + + const auto& gpu = system.GPU(); + const auto& multi_fence = buffer->get().multi_fence; + for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { + const auto& fence = multi_fence.fences[fence_id]; + gpu.WaitFence(fence.id, fence.value); + } // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 5d7e3bfb8..95d7278f5 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -102,6 +102,8 @@ private: u32 swap_interval = 1; + bool first_buffer_enqueued{}; + /// Event that handles screen composition. Core::Timing::EventType* composition_event; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..d94be9c9d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load() < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..e20b0687a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..d7048b6ae 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) { return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..108f456bd 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue; CommandQueue queue; u64 last_fence{}; @@ -128,7 +123,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; From 976d9ef43c6eb431a1963a2e5857c100eeacd952 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 08:46:22 -0400 Subject: [PATCH 03/11] NvFlinger: Don't swap buffers if a frame is missing and always trigger event in sync gpu. --- src/core/hle/service/nvflinger/nvflinger.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 86a90526c..3c08ac9a3 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -203,7 +203,9 @@ void NVFlinger::Compose() { if (!buffer) { // There was no queued buffer to draw, render previous frame - system.GPU().SwapBuffers({}); + auto& gpu = system.GPU(); + // Always trigger on sync GPU. + trigger_event = !gpu.IsAsync(); continue; } From ffc2ce89a03d8160c408922cd72a1f45e333c0fe Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 10:12:05 -0400 Subject: [PATCH 04/11] Nvdrv: Do framelimiting only in the CPU Thread --- src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 4 ++++ src/video_core/renderer_opengl/renderer_opengl.cpp | 3 --- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index f764388bc..3f7b8e670 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -5,6 +5,7 @@ #include "common/assert.h" #include "common/logging/log.h" #include "core/core.h" +#include "core/core_timing.h" #include "core/hle/service/nvdrv/devices/nvdisp_disp0.h" #include "core/hle/service/nvdrv/devices/nvmap.h" #include "core/perf_stats.h" @@ -38,7 +39,10 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 transform, crop_rect}; system.GetPerfStats().EndGameFrame(); + system.GetPerfStats().EndSystemFrame(); system.GPU().SwapBuffers(&framebuffer); + system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); + system.GetPerfStats().BeginSystemFrame(); } } // namespace Service::Nvidia::Devices diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 1e6ef66ab..7f6ff0857 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -135,9 +135,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { render_window.PollEvents(); - system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs()); - system.GetPerfStats().BeginSystemFrame(); - // Restore the rasterizer state prev_state.AllDirty(); prev_state.Apply(); From 782b7a0ca45b08d981e48a6d9a7af73cbed13281 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 15:41:10 -0400 Subject: [PATCH 05/11] NVFlinger: Reverse the change that only signaled events on buffer acquire. This has been hardware tested and it seems that NVFlinger will still signal even if there are no buffers to present. --- src/core/hle/service/nvflinger/nvflinger.cpp | 19 +------------------ src/core/hle/service/nvflinger/nvflinger.h | 2 -- 2 files changed, 1 insertion(+), 20 deletions(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3c08ac9a3..6eee20efe 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -174,19 +174,8 @@ const VI::Layer* NVFlinger::FindLayer(u64 display_id, u64 layer_id) const { void NVFlinger::Compose() { for (auto& display : displays) { - bool trigger_event = false; // Trigger vsync for this display at the end of drawing - SCOPE_EXIT({ - // TODO(Blinkhawk): Correctly send buffers through nvflinger while - // loading the game thorugh the OS. - // During loading, the OS takes care of sending buffers to vsync, - // thus it triggers, since this is not properly emulated due to - // HLE complications, we allow it to signal until the game enqueues - // it's first buffer. - if (trigger_event || !first_buffer_enqueued) { - display.SignalVSyncEvent(); - } - }); + SCOPE_EXIT({ display.SignalVSyncEvent(); }); // Don't do anything for displays without layers. if (!display.HasLayers()) @@ -202,16 +191,10 @@ void NVFlinger::Compose() { MicroProfileFlip(); if (!buffer) { - // There was no queued buffer to draw, render previous frame - auto& gpu = system.GPU(); - // Always trigger on sync GPU. - trigger_event = !gpu.IsAsync(); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; - trigger_event = true; - first_buffer_enqueued = true; const auto& gpu = system.GPU(); const auto& multi_fence = buffer->get().multi_fence; diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 95d7278f5..5d7e3bfb8 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -102,8 +102,6 @@ private: u32 swap_interval = 1; - bool first_buffer_enqueued{}; - /// Event that handles screen composition. Core::Timing::EventType* composition_event; From 69fa2e652560dd72f7b53d44fec9d7fe4aa0ffb9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 18:11:27 -0400 Subject: [PATCH 06/11] Nvdrv: Correct Event setup in Nvdrv Events are supposed to be cleared on quering. This fixes that issue. --- .../hle/service/nvdrv/devices/nvhost_ctrl.cpp | 33 +++++++------------ src/core/hle/service/nvdrv/interface.cpp | 4 ++- 2 files changed, 14 insertions(+), 23 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp index ff6b1abae..708469bd5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_ctrl.cpp @@ -62,16 +62,26 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& return NvResult::BadParameter; } + u32 event_id = params.value & 0x00FF; + + if (event_id >= MaxNvEvents) { + std::memcpy(output.data(), ¶ms, sizeof(params)); + return NvResult::BadParameter; + } + + auto event = events_interface.events[event_id]; auto& gpu = system.GPU(); // This is mostly to take into account unimplemented features. As synced // gpu is always synced. if (!gpu.IsAsync()) { + event.writable->Signal(); return NvResult::Success; } auto lock = gpu.LockSync(); const u32 current_syncpoint_value = gpu.GetSyncpointValue(params.syncpt_id); const s32 diff = current_syncpoint_value - params.threshold; if (diff >= 0) { + event.writable->Signal(); params.value = current_syncpoint_value; std::memcpy(output.data(), ¶ms, sizeof(params)); return NvResult::Success; @@ -87,27 +97,6 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& return NvResult::Timeout; } - u32 event_id; - if (is_async) { - event_id = params.value & 0x00FF; - if (event_id >= MaxNvEvents) { - std::memcpy(output.data(), ¶ms, sizeof(params)); - return NvResult::BadParameter; - } - } else { - if (ctrl.fresh_call) { - const auto result = events_interface.GetFreeEvent(); - if (result) { - event_id = *result; - } else { - LOG_CRITICAL(Service_NVDRV, "No Free Events available!"); - event_id = params.value & 0x00FF; - } - } else { - event_id = ctrl.event_id; - } - } - EventState status = events_interface.status[event_id]; if (event_id < MaxNvEvents || status == EventState::Free || status == EventState::Registered) { events_interface.SetEventStatus(event_id, EventState::Waiting); @@ -119,7 +108,7 @@ u32 nvhost_ctrl::IocCtrlEventWait(const std::vector& input, std::vector& params.value = ((params.syncpt_id & 0xfff) << 16) | 0x10000000; } params.value |= event_id; - events_interface.events[event_id].writable->Clear(); + event.writable->Clear(); gpu.RegisterSyncptInterrupt(params.syncpt_id, target_value); if (!is_async && ctrl.fresh_call) { ctrl.must_delay = true; diff --git a/src/core/hle/service/nvdrv/interface.cpp b/src/core/hle/service/nvdrv/interface.cpp index 5e0c23602..68d139cfb 100644 --- a/src/core/hle/service/nvdrv/interface.cpp +++ b/src/core/hle/service/nvdrv/interface.cpp @@ -134,7 +134,9 @@ void NVDRV::QueryEvent(Kernel::HLERequestContext& ctx) { IPC::ResponseBuilder rb{ctx, 3, 1}; rb.Push(RESULT_SUCCESS); if (event_id < MaxNvEvents) { - rb.PushCopyObjects(nvdrv->GetEvent(event_id)); + auto event = nvdrv->GetEvent(event_id); + event->Clear(); + rb.PushCopyObjects(event); rb.Push(NvResult::Success); } else { rb.Push(0); From 3f104464dec13f9ba90eaca5dafca87ee4116a60 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 26 Sep 2019 19:08:22 -0400 Subject: [PATCH 07/11] Core: Wait for GPU to be idle before shutting down. --- src/core/core.cpp | 2 ++ src/video_core/gpu.h | 3 +++ src/video_core/gpu_asynch.cpp | 4 ++++ src/video_core/gpu_asynch.h | 1 + src/video_core/gpu_synch.h | 1 + src/video_core/gpu_thread.cpp | 5 +++++ src/video_core/gpu_thread.h | 3 +++ 7 files changed, 19 insertions(+) diff --git a/src/core/core.cpp b/src/core/core.cpp index 75a7ffb97..8f68bdbad 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -252,6 +252,8 @@ struct System::Impl { is_powered_on = false; exit_lock = false; + gpu_core->WaitIdle(); + // Shutdown emulation session renderer.reset(); GDBStub::Shutdown(); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index e20b0687a..dbca19f35 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + // Waits for the GPU to finish working + virtual void WaitIdle() const = 0; + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value) const; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index f2a3a390e..04222d060 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } +void GPUAsynch::WaitIdle() const { + gpu_thread.WaitIdle(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index a12f9bac4..1241ade1d 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -25,6 +25,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override; protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h index 5eb1c461c..c71baee89 100644 --- a/src/video_core/gpu_synch.h +++ b/src/video_core/gpu_synch.h @@ -24,6 +24,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void WaitIdle() const override {} protected: void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id, diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index d7048b6ae..4a42634d2 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -90,6 +90,11 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void ThreadManager::WaitIdle() const { + while (state.last_fence > state.signaled_fence.load()) { + } +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 108f456bd..08dc96bb3 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -116,6 +116,9 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(CacheAddr addr, u64 size); + // Wait until the gpu thread is idle. + void WaitIdle() const; + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); From 9f2719d1a43e04cc2f5296d0f9719aab9626f730 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 27 Sep 2019 09:39:42 -0400 Subject: [PATCH 08/11] Gl_Rasterizer: Protect CPU Memory mapping from multiple threads. --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 1 + src/video_core/renderer_opengl/gl_rasterizer.h | 3 +++ 2 files changed, 4 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6a17bed72..deb3e10a5 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -348,6 +348,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { } void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + std::lock_guard lock{pages_mutex}; const u64 page_start{addr >> Memory::PAGE_BITS}; const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 9c10ebda3..c24a02d71 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -230,6 +231,8 @@ private: using CachedPageMap = boost::icl::interval_map; CachedPageMap cached_pages; + + std::mutex pages_mutex; }; } // namespace OpenGL From 75395605d69f3515c72437135ebafee80eba775c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 27 Sep 2019 12:00:34 -0400 Subject: [PATCH 09/11] NvFlinger: Remove leftover from corrections and clang format. --- src/core/hle/service/nvflinger/nvflinger.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 6eee20efe..e00b665ea 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -36,10 +36,6 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(3, "Internal", system); displays.emplace_back(4, "Null", system); - for (auto& display : displays) { - display.SignalVSyncEvent(); - } - // Schedule the screen composition events composition_event = system.CoreTiming().RegisterEvent( "ScreenComposition", [this](u64 userdata, s64 cycles_late) { From 538f5880fff5e29fd5539eea371c3131013908e4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 29 Sep 2019 10:12:28 -0400 Subject: [PATCH 10/11] GL_Renderer: Remove lefting snippet. --- src/video_core/renderer_opengl/renderer_opengl.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 7f6ff0857..4bbd17b12 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - system.GetPerfStats().EndSystemFrame(); - // Maintain the rasterizer's state as a priority OpenGLState prev_state = OpenGLState::GetCurState(); state.AllDirty(); From cfc2f30dc409cbbb36c19c74a98f3017e6d722f2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 11 Oct 2019 13:41:15 -0400 Subject: [PATCH 11/11] AsyncGpu: Address Feedback --- src/video_core/gpu.cpp | 2 +- src/video_core/gpu_thread.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index d94be9c9d..da7359d4d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -72,7 +72,7 @@ void GPU::WaitFence(u32 syncpoint_id, u32 value) const { return; } MICROPROFILE_SCOPE(GPU_wait); - while (syncpoints[syncpoint_id].load() < value) { + while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) { } } diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 4a42634d2..758a37f14 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -91,7 +91,7 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { } void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load()) { + while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) { } }