Merge pull request #84 from bunnei/fix-hw-synchronization

Fix GPU/HW synchronization
2014-08-31 00:53:07 -04:00 · 2014-08-31 00:53:07 -04:00 · 76372feb19
parent 038a51aac1 aabfcfe6ad
commit 76372feb19
4 changed files with 51 additions and 34 deletions
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) {
        return -1;
    }
    while(true) {
        Core::RunLoop();
    }
    delete emu_window;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -6,6 +6,8 @@
 #include "common/log.h"
 #include "common/symbols.h"
 #include "video_core/video_core.h"
 #include "core/core.h"
 #include "core/mem_map.h"
 #include "core/hw/hw.h"
@ -24,29 +26,17 @@ ARM_Interface*  g_app_core      = nullptr;  ///< ARM11 application core
 ARM_Interface*  g_sys_core      = nullptr;  ///< ARM11 system (OS) core
 /// Run the core CPU loop
-void RunLoop() {
+void RunLoop(int tight_loop) {
-    for (;;){
+    g_app_core->Run(tight_loop);
        // This function loops for 100 instructions in the CPU before trying to update hardware.
        // This is a little bit faster than SingleStep, and should be pretty much equivalent. The 
        // number of instructions chosen is fairly arbitrary, however a large number will more 
        // drastically affect the frequency of GSP interrupts and likely break things. The point of
        // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
        // it a little bit faster...
        g_app_core->Run(100);
    HW::Update();
    if (HLE::g_reschedule) {
        Kernel::Reschedule();
    }
    }
 }
 /// Step the CPU one instruction
 void SingleStep() {
-    g_app_core->Step();
+    RunLoop(1);
    HW::Update();
    if (HLE::g_reschedule) {
        Kernel::Reschedule();
    }
 }
 /// Halt the core
--- a/src/core/core.h
+++ b/src/core/core.h
@ -19,8 +19,15 @@ extern ARM_Interface*   g_sys_core;     ///< ARM11 system (OS) core
 /// Start the core
 void Start();
-/// Run the core CPU loop
+/**
-void RunLoop();
+ * Run the core CPU loop
 * This function loops for 100 instructions in the CPU before trying to update hardware. This is a
 * little bit faster than SingleStep, and should be pretty much equivalent. The number of
 * instructions chosen is fairly arbitrary, however a large number will more drastically affect the
 * frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU
 * for more than 1 instruction to reduce overhead and make it a little bit faster...
 */
 void RunLoop(int tight_loop=100);
 /// Step the CPU one instruction
 void SingleStep();
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@ -24,6 +24,7 @@ Regs g_regs;
 u32 g_cur_line = 0;         ///< Current vertical screen line
 u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
 u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame
 template <typename T>
 inline void Read(T &var, const u32 raw_addr) {
@ -179,6 +180,25 @@ void Update() {
    auto& framebuffer_top = g_regs.framebuffer_config[0];
    u64 current_ticks = Core::g_app_core->GetTicks();
    // Update the frame after a certain number of CPU ticks have elapsed. This assumes that the
    // active frame in memory is always complete to render. There also may be issues with this
    // becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to
    // be the most effective solution for both homebrew and retail applications. With retail, this
    // could be moved below (and probably would guarantee more accurate synchronization). However,
    // primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a
    // threading reschedule).
    if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) {
        VideoCore::g_renderer->SwapBuffers();
        g_last_frame_ticks = current_ticks;
    }
    // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical
    // blank, we need to simulate it. Based on testing, it seems that retail applications work more
    // accurately when this is signalled between thread switches.
    if (HLE::g_reschedule) {
        // Synchronize line...
        if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
            GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
@ -190,16 +210,14 @@ void Update() {
        if (g_cur_line >= framebuffer_top.height) {
            g_cur_line = 0;
            GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
-        VideoCore::g_renderer->SwapBuffers();
+        }
        Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
        HLE::Reschedule(__func__);
    }
 }
 /// Initialize hardware
 void Init() {
    g_cur_line = 0;
-    g_last_line_ticks = Core::g_app_core->GetTicks();
+    g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks();
    auto& framebuffer_top = g_regs.framebuffer_config[0];
    auto& framebuffer_sub = g_regs.framebuffer_config[1];