Merge pull request #3395 from FernandoS27/queries
GPU: Refactor queries implementation and correct GPU Clock.
This commit is contained in:
commit
3563af2364
|
@ -9,6 +9,7 @@
|
|||
#include "core/core_timing.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/shader_type.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
@ -519,61 +520,63 @@ void Maxwell3D::ProcessFirmwareCall4() {
|
|||
regs.reg_array[0xd00] = 1;
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
|
||||
struct LongQueryResult {
|
||||
u64_le value;
|
||||
u64_le timestamp;
|
||||
};
|
||||
static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
|
||||
const GPUVAddr sequence_address{regs.query.QueryAddress()};
|
||||
// Since the sequence address is given as a GPU VAddr, we have to convert it to an application
|
||||
// VAddr before writing.
|
||||
if (long_query) {
|
||||
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
|
||||
// GPU, this command may actually take a while to complete in real hardware due to GPU
|
||||
// wait queues.
|
||||
LongQueryResult query_result{payload, system.GPU().GetTicks()};
|
||||
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
|
||||
} else {
|
||||
memory_manager.Write<u32>(sequence_address, static_cast<u32>(payload));
|
||||
}
|
||||
}
|
||||
|
||||
void Maxwell3D::ProcessQueryGet() {
|
||||
// TODO(Subv): Support the other query units.
|
||||
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
|
||||
"Units other than CROP are unimplemented");
|
||||
|
||||
u64 result = 0;
|
||||
|
||||
// TODO(Subv): Support the other query variables
|
||||
switch (regs.query.query_get.operation) {
|
||||
case Regs::QueryOperation::Release: {
|
||||
const u64 result = regs.query.query_sequence;
|
||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
||||
break;
|
||||
}
|
||||
case Regs::QueryOperation::Acquire: {
|
||||
// Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
|
||||
// to write a value that matches the current payload.
|
||||
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
||||
break;
|
||||
}
|
||||
case Regs::QueryOperation::Counter: {
|
||||
u64 result{};
|
||||
switch (regs.query.query_get.select) {
|
||||
case Regs::QuerySelect::Zero:
|
||||
// This seems to actually write the query sequence to the query address.
|
||||
result = regs.query.query_sequence;
|
||||
result = 0;
|
||||
break;
|
||||
default:
|
||||
result = 1;
|
||||
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||
}
|
||||
|
||||
// TODO(Subv): Research and implement how query sync conditions work.
|
||||
|
||||
struct LongQueryResult {
|
||||
u64_le value;
|
||||
u64_le timestamp;
|
||||
};
|
||||
static_assert(sizeof(LongQueryResult) == 16, "LongQueryResult has wrong size");
|
||||
|
||||
switch (regs.query.query_get.mode) {
|
||||
case Regs::QueryMode::Write:
|
||||
case Regs::QueryMode::Write2: {
|
||||
u32 sequence = regs.query.query_sequence;
|
||||
if (regs.query.query_get.short_query) {
|
||||
// Write the current query sequence to the sequence address.
|
||||
// TODO(Subv): Find out what happens if you use a long query type but mark it as a short
|
||||
// query.
|
||||
memory_manager.Write<u32>(sequence_address, sequence);
|
||||
} else {
|
||||
// Write the 128-bit result structure in long mode. Note: We emulate an infinitely fast
|
||||
// GPU, this command may actually take a while to complete in real hardware due to GPU
|
||||
// wait queues.
|
||||
LongQueryResult query_result{};
|
||||
query_result.value = result;
|
||||
// TODO(Subv): Generate a real GPU timestamp and write it here instead of CoreTiming
|
||||
query_result.timestamp = system.CoreTiming().GetTicks();
|
||||
memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
|
||||
}
|
||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
||||
break;
|
||||
}
|
||||
case Regs::QueryOperation::Trap: {
|
||||
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
UNIMPLEMENTED_MSG("Unknown query operation");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Query mode {} not implemented",
|
||||
static_cast<u32>(regs.query.query_get.mode.Value()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -71,12 +71,11 @@ public:
|
|||
static constexpr std::size_t MaxConstBuffers = 18;
|
||||
static constexpr std::size_t MaxConstBufferSize = 0x10000;
|
||||
|
||||
enum class QueryMode : u32 {
|
||||
Write = 0,
|
||||
Sync = 1,
|
||||
// TODO(Subv): It is currently unknown what the difference between method 2 and method 0
|
||||
// is.
|
||||
Write2 = 2,
|
||||
enum class QueryOperation : u32 {
|
||||
Release = 0,
|
||||
Acquire = 1,
|
||||
Counter = 2,
|
||||
Trap = 3,
|
||||
};
|
||||
|
||||
enum class QueryUnit : u32 {
|
||||
|
@ -1081,7 +1080,7 @@ public:
|
|||
u32 query_sequence;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 2, QueryMode> mode;
|
||||
BitField<0, 2, QueryOperation> operation;
|
||||
BitField<4, 1, u32> fence;
|
||||
BitField<12, 4, QueryUnit> unit;
|
||||
BitField<16, 1, QuerySyncCondition> sync_cond;
|
||||
|
@ -1413,6 +1412,9 @@ private:
|
|||
/// Handles a write to the QUERY_GET register.
|
||||
void ProcessQueryGet();
|
||||
|
||||
// Writes the query result accordingly
|
||||
void StampQueryResult(u64 payload, bool long_query);
|
||||
|
||||
// Handles Conditional Rendering
|
||||
void ProcessQueryCondition();
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "common/microprofile.h"
|
||||
#include "core/core.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
|
@ -122,6 +123,19 @@ bool GPU::CancelSyncptInterrupt(const u32 syncpoint_id, const u32 value) {
|
|||
return true;
|
||||
}
|
||||
|
||||
u64 GPU::GetTicks() const {
|
||||
// This values were reversed engineered by fincs from NVN
|
||||
// The gpu clock is reported in units of 385/625 nanoseconds
|
||||
constexpr u64 gpu_ticks_num = 384;
|
||||
constexpr u64 gpu_ticks_den = 625;
|
||||
|
||||
const u64 cpu_ticks = system.CoreTiming().GetTicks();
|
||||
const u64 nanoseconds = Core::Timing::CyclesToNs(cpu_ticks).count();
|
||||
const u64 nanoseconds_num = nanoseconds / gpu_ticks_den;
|
||||
const u64 nanoseconds_rem = nanoseconds % gpu_ticks_den;
|
||||
return nanoseconds_num * gpu_ticks_num + (nanoseconds_rem * gpu_ticks_num) / gpu_ticks_den;
|
||||
}
|
||||
|
||||
void GPU::FlushCommands() {
|
||||
renderer.Rasterizer().FlushCommands();
|
||||
}
|
||||
|
@ -340,7 +354,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
|
|||
block.sequence = regs.semaphore_sequence;
|
||||
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
|
||||
// CoreTiming
|
||||
block.timestamp = system.CoreTiming().GetTicks();
|
||||
block.timestamp = GetTicks();
|
||||
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
|
||||
sizeof(block));
|
||||
} else {
|
||||
|
|
|
@ -192,6 +192,8 @@ public:
|
|||
|
||||
bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
|
||||
|
||||
u64 GetTicks() const;
|
||||
|
||||
std::unique_lock<std::mutex> LockSync() {
|
||||
return std::unique_lock{sync_mutex};
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue