gl_query_cache: Implement host queries using a deferred cache
Instead of waiting immediately for executed commands, defer the query until the guest CPU reads it. This way we get closer to what the guest program is doing. To archive this we have to build a dependency queue, because host APIs (like OpenGL and Vulkan) use ranged queries instead of counters like NVN. Waiting for queries implicitly uses fences and this requires a command being queued, otherwise the driver will lock waiting until a timeout. To fix this when there are no commands queued, we explicitly call glFlush.
This commit is contained in:
parent
ef9920e164
commit
aae8c180cb
|
@ -556,23 +556,13 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
// matches the current payload.
|
// matches the current payload.
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
||||||
break;
|
break;
|
||||||
case Regs::QueryOperation::Counter: {
|
case Regs::QueryOperation::Counter:
|
||||||
u64 result;
|
if (const std::optional<u64> result = GetQueryResult()) {
|
||||||
switch (regs.query.query_get.select) {
|
// If the query returns an empty optional it means it's cached and deferred.
|
||||||
case Regs::QuerySelect::Zero:
|
// In this case we have a non-empty result, so we stamp it immediately.
|
||||||
result = 0;
|
StampQueryResult(*result, regs.query.query_get.short_query == 0);
|
||||||
break;
|
|
||||||
case Regs::QuerySelect::SamplesPassed:
|
|
||||||
result = rasterizer.Query(VideoCore::QueryType::SamplesPassed);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
result = 1;
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
|
||||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
|
||||||
}
|
}
|
||||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case Regs::QueryOperation::Trap:
|
case Regs::QueryOperation::Trap:
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
||||||
break;
|
break;
|
||||||
|
@ -595,20 +585,20 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||||
}
|
}
|
||||||
case Regs::ConditionMode::ResNonZero: {
|
case Regs::ConditionMode::ResNonZero: {
|
||||||
Regs::QueryCompare cmp;
|
Regs::QueryCompare cmp;
|
||||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||||
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
|
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Regs::ConditionMode::Equal: {
|
case Regs::ConditionMode::Equal: {
|
||||||
Regs::QueryCompare cmp;
|
Regs::QueryCompare cmp;
|
||||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||||
execute_on =
|
execute_on =
|
||||||
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
|
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Regs::ConditionMode::NotEqual: {
|
case Regs::ConditionMode::NotEqual: {
|
||||||
Regs::QueryCompare cmp;
|
Regs::QueryCompare cmp;
|
||||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||||
execute_on =
|
execute_on =
|
||||||
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
|
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
|
||||||
break;
|
break;
|
||||||
|
@ -674,6 +664,21 @@ void Maxwell3D::DrawArrays() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||||
|
switch (regs.query.query_get.select) {
|
||||||
|
case Regs::QuerySelect::Zero:
|
||||||
|
return 0;
|
||||||
|
case Regs::QuerySelect::SamplesPassed:
|
||||||
|
// Deferred.
|
||||||
|
rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed);
|
||||||
|
return {};
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
||||||
|
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
|
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
|
||||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||||
auto& shader = state.shader_stages[stage_index];
|
auto& shader = state.shader_stages[stage_index];
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
#include <optional>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -1462,6 +1463,9 @@ private:
|
||||||
|
|
||||||
// Handles a instance drawcall from MME
|
// Handles a instance drawcall from MME
|
||||||
void StepInstance(MMEDrawMode expected_mode, u32 count);
|
void StepInstance(MMEDrawMode expected_mode, u32 count);
|
||||||
|
|
||||||
|
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||||
|
std::optional<u64> GetQueryResult();
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ASSERT_REG_POSITION(field_name, position) \
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
|
|
|
@ -20,6 +20,7 @@ namespace VideoCore {
|
||||||
enum class QueryType {
|
enum class QueryType {
|
||||||
SamplesPassed,
|
SamplesPassed,
|
||||||
};
|
};
|
||||||
|
constexpr std::size_t NumQueryTypes = 1;
|
||||||
|
|
||||||
enum class LoadCallbackStage {
|
enum class LoadCallbackStage {
|
||||||
Prepare,
|
Prepare,
|
||||||
|
@ -48,8 +49,8 @@ public:
|
||||||
/// Resets the counter of a query
|
/// Resets the counter of a query
|
||||||
virtual void ResetCounter(QueryType type) = 0;
|
virtual void ResetCounter(QueryType type) = 0;
|
||||||
|
|
||||||
/// Returns the value of a GPU query
|
/// Records a GPU query and caches it
|
||||||
virtual u64 Query(QueryType type) = 0;
|
virtual void Query(GPUVAddr gpu_addr, QueryType type) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
|
@ -2,58 +2,203 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <glad/glad.h>
|
#include <glad/glad.h>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "core/core.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_query_cache.h"
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
HostCounter::HostCounter(GLenum target) {
|
using VideoCore::QueryType;
|
||||||
query.Create(target);
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
||||||
|
|
||||||
|
constexpr GLenum GetTarget(QueryType type) {
|
||||||
|
return QueryTargets[static_cast<std::size_t>(type)];
|
||||||
}
|
}
|
||||||
|
|
||||||
HostCounter::~HostCounter() = default;
|
} // Anonymous namespace
|
||||||
|
|
||||||
void HostCounter::UpdateState(bool enabled) {
|
CounterStream::CounterStream(QueryCache& cache, QueryType type)
|
||||||
|
: cache{cache}, type{type}, target{GetTarget(type)} {}
|
||||||
|
|
||||||
|
CounterStream::~CounterStream() = default;
|
||||||
|
|
||||||
|
void CounterStream::Update(bool enabled, bool any_command_queued) {
|
||||||
if (enabled) {
|
if (enabled) {
|
||||||
Enable();
|
if (!current) {
|
||||||
} else {
|
current = cache.GetHostCounter(last, type);
|
||||||
Disable();
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current) {
|
||||||
|
EndQuery(any_command_queued);
|
||||||
|
}
|
||||||
|
last = std::exchange(current, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void CounterStream::Reset(bool any_command_queued) {
|
||||||
|
if (current) {
|
||||||
|
EndQuery(any_command_queued);
|
||||||
|
}
|
||||||
|
current = nullptr;
|
||||||
|
last = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
|
||||||
|
if (!current) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
EndQuery(any_command_queued);
|
||||||
|
last = std::move(current);
|
||||||
|
current = cache.GetHostCounter(last, type);
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CounterStream::EndQuery(bool any_command_queued) {
|
||||||
|
if (!any_command_queued) {
|
||||||
|
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||||
|
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||||
|
// for this. Insert to the OpenGL command stream a flush.
|
||||||
|
glFlush();
|
||||||
|
}
|
||||||
|
glEndQuery(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
|
||||||
|
: RasterizerCache{rasterizer}, system{system},
|
||||||
|
rasterizer{rasterizer}, streams{{CounterStream{*this, QueryType::SamplesPassed}}} {}
|
||||||
|
|
||||||
|
QueryCache::~QueryCache() = default;
|
||||||
|
|
||||||
|
void QueryCache::Query(GPUVAddr gpu_addr, QueryType type) {
|
||||||
|
auto& memory_manager = system.GPU().MemoryManager();
|
||||||
|
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||||
|
|
||||||
|
auto query = TryGet(host_ptr);
|
||||||
|
if (!query) {
|
||||||
|
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||||
|
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||||
|
|
||||||
|
query = std::make_shared<CachedQuery>(type, *cpu_addr, host_ptr);
|
||||||
|
Register(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()));
|
||||||
|
query->MarkAsModified(true, *this);
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryCache::UpdateCounters() {
|
||||||
|
auto& samples_passed = GetStream(QueryType::SamplesPassed);
|
||||||
|
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryCache::ResetCounter(QueryType type) {
|
||||||
|
GetStream(type).Reset(rasterizer.AnyCommandQueued());
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
|
||||||
|
reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
|
||||||
|
QueryType type) {
|
||||||
|
const auto type_index = static_cast<std::size_t>(type);
|
||||||
|
auto& reserve = reserved_queries[type_index];
|
||||||
|
|
||||||
|
if (reserve.empty()) {
|
||||||
|
return std::make_shared<HostCounter>(*this, std::move(dependency), type);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto counter = std::make_shared<HostCounter>(*this, std::move(dependency), type,
|
||||||
|
std::move(reserve.back()));
|
||||||
|
reserve.pop_back();
|
||||||
|
return counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryCache::FlushObjectInner(const std::shared_ptr<CachedQuery>& counter_) {
|
||||||
|
auto& counter = *counter_;
|
||||||
|
auto& stream = GetStream(counter.GetType());
|
||||||
|
|
||||||
|
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||||
|
// To avoid this disable and re-enable keeping the dependency stream.
|
||||||
|
const bool is_enabled = stream.IsEnabled();
|
||||||
|
if (is_enabled) {
|
||||||
|
stream.Update(false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
counter.Flush();
|
||||||
|
|
||||||
|
if (is_enabled) {
|
||||||
|
stream.Update(true, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HostCounter::Reset() {
|
CounterStream& QueryCache::GetStream(QueryType type) {
|
||||||
counter = 0;
|
return streams[static_cast<std::size_t>(type)];
|
||||||
Disable();
|
}
|
||||||
|
|
||||||
|
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type)
|
||||||
|
: cache{cache}, type{type}, dependency{std::move(dependency)} {
|
||||||
|
const GLenum target = GetTarget(type);
|
||||||
|
query.Create(target);
|
||||||
|
glBeginQuery(target, query.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
|
||||||
|
OGLQuery&& query_)
|
||||||
|
: cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
|
||||||
|
glBeginQuery(GetTarget(type), query.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::~HostCounter() {
|
||||||
|
cache.Reserve(type, std::move(query));
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 HostCounter::Query() {
|
u64 HostCounter::Query() {
|
||||||
if (!is_beginned) {
|
if (query.handle == 0) {
|
||||||
return counter;
|
return result;
|
||||||
}
|
}
|
||||||
Disable();
|
|
||||||
u64 value;
|
|
||||||
glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
|
|
||||||
Enable();
|
|
||||||
|
|
||||||
counter += value;
|
glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &result);
|
||||||
return counter;
|
|
||||||
|
if (dependency) {
|
||||||
|
result += dependency->Query();
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void HostCounter::Enable() {
|
CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||||
if (is_beginned) {
|
: RasterizerCacheObject{host_ptr}, type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||||
return;
|
|
||||||
}
|
CachedQuery::~CachedQuery() = default;
|
||||||
is_beginned = true;
|
|
||||||
glBeginQuery(GL_SAMPLES_PASSED, query.handle);
|
void CachedQuery::Flush() {
|
||||||
|
const u64 value = counter->Query();
|
||||||
|
std::memcpy(host_ptr, &value, sizeof(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
void HostCounter::Disable() {
|
void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_) {
|
||||||
if (!is_beginned) {
|
counter = std::move(counter_);
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
glEndQuery(GL_SAMPLES_PASSED);
|
QueryType CachedQuery::GetType() const {
|
||||||
is_beginned = false;
|
return type;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -4,38 +4,131 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <optional>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
#include <glad/glad.h>
|
#include <glad/glad.h>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/rasterizer_cache.h"
|
||||||
|
#include "video_core/rasterizer_interface.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
|
||||||
|
namespace Core {
|
||||||
|
class System;
|
||||||
|
}
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class CachedQuery;
|
||||||
|
class HostCounter;
|
||||||
|
class RasterizerOpenGL;
|
||||||
|
class QueryCache;
|
||||||
|
|
||||||
|
class CounterStream final {
|
||||||
|
public:
|
||||||
|
explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
|
||||||
|
~CounterStream();
|
||||||
|
|
||||||
|
void Update(bool enabled, bool any_command_queued);
|
||||||
|
|
||||||
|
void Reset(bool any_command_queued);
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
|
||||||
|
|
||||||
|
bool IsEnabled() const {
|
||||||
|
return current != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void EndQuery(bool any_command_queued);
|
||||||
|
|
||||||
|
QueryCache& cache;
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> current;
|
||||||
|
std::shared_ptr<HostCounter> last;
|
||||||
|
VideoCore::QueryType type;
|
||||||
|
GLenum target;
|
||||||
|
};
|
||||||
|
|
||||||
|
class QueryCache final : public RasterizerCache<std::shared_ptr<CachedQuery>> {
|
||||||
|
public:
|
||||||
|
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
|
||||||
|
~QueryCache();
|
||||||
|
|
||||||
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type);
|
||||||
|
|
||||||
|
void UpdateCounters();
|
||||||
|
|
||||||
|
void ResetCounter(VideoCore::QueryType type);
|
||||||
|
|
||||||
|
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void FlushObjectInner(const std::shared_ptr<CachedQuery>& counter) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
CounterStream& GetStream(VideoCore::QueryType type);
|
||||||
|
|
||||||
|
Core::System& system;
|
||||||
|
RasterizerOpenGL& rasterizer;
|
||||||
|
|
||||||
|
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||||
|
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
|
||||||
|
};
|
||||||
|
|
||||||
class HostCounter final {
|
class HostCounter final {
|
||||||
public:
|
public:
|
||||||
explicit HostCounter(GLenum target);
|
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type);
|
||||||
|
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type, OGLQuery&& query);
|
||||||
~HostCounter();
|
~HostCounter();
|
||||||
|
|
||||||
/// Enables or disables the counter as required.
|
|
||||||
void UpdateState(bool enabled);
|
|
||||||
|
|
||||||
/// Resets the counter disabling it if needed.
|
|
||||||
void Reset();
|
|
||||||
|
|
||||||
/// Returns the current value of the query.
|
/// Returns the current value of the query.
|
||||||
/// @note It may harm precision of future queries if the counter is not disabled.
|
|
||||||
u64 Query();
|
u64 Query();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Enables the counter when disabled.
|
QueryCache& cache;
|
||||||
void Enable();
|
VideoCore::QueryType type;
|
||||||
|
|
||||||
/// Disables the counter when enabled.
|
|
||||||
void Disable();
|
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
|
||||||
OGLQuery query; ///< OpenGL query.
|
OGLQuery query; ///< OpenGL query.
|
||||||
u64 counter{}; ///< Added values of the counter.
|
u64 result; ///< Added values of the counter.
|
||||||
bool is_beginned{}; ///< True when the OpenGL query is beginned.
|
};
|
||||||
|
|
||||||
|
class CachedQuery final : public RasterizerCacheObject {
|
||||||
|
public:
|
||||||
|
explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
|
||||||
|
~CachedQuery();
|
||||||
|
|
||||||
|
/// Writes the counter value to host memory.
|
||||||
|
void Flush();
|
||||||
|
|
||||||
|
/// Updates the counter this cached query registered in guest memory will write when requested.
|
||||||
|
void SetCounter(std::shared_ptr<HostCounter> counter);
|
||||||
|
|
||||||
|
/// Returns the query type.
|
||||||
|
VideoCore::QueryType GetType() const;
|
||||||
|
|
||||||
|
VAddr GetCpuAddr() const override {
|
||||||
|
return cpu_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::size_t GetSizeInBytes() const override {
|
||||||
|
return sizeof(u64);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
VideoCore::QueryType type;
|
||||||
|
VAddr cpu_addr; ///< Guest CPU address.
|
||||||
|
u8* host_ptr; ///< Writable host pointer.
|
||||||
|
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||||
|
@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
ScreenInfo& info)
|
ScreenInfo& info)
|
||||||
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
|
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
|
||||||
shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
|
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
|
||||||
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
|
screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
|
||||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||||
state.draw.shader_program = 0;
|
state.draw.shader_program = 0;
|
||||||
state.Apply();
|
state.Apply();
|
||||||
|
@ -548,9 +549,9 @@ void RasterizerOpenGL::Clear() {
|
||||||
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
|
||||||
const auto& regs = gpu.regs;
|
const auto& regs = gpu.regs;
|
||||||
samples_passed.UpdateState(regs.samplecnt_enable);
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
SyncRasterizeEnable(state);
|
SyncRasterizeEnable(state);
|
||||||
SyncColorMask();
|
SyncColorMask();
|
||||||
|
@ -718,24 +719,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||||
switch (type) {
|
query_cache.ResetCounter(type);
|
||||||
case VideoCore::QueryType::SamplesPassed:
|
|
||||||
samples_passed.Reset();
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 RasterizerOpenGL::Query(VideoCore::QueryType type) {
|
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type) {
|
||||||
switch (type) {
|
query_cache.Query(gpu_addr, type);
|
||||||
case VideoCore::QueryType::SamplesPassed:
|
|
||||||
return samples_passed.Query();
|
|
||||||
default:
|
|
||||||
UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAll() {}
|
void RasterizerOpenGL::FlushAll() {}
|
||||||
|
@ -747,6 +735,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
}
|
}
|
||||||
texture_cache.FlushRegion(addr, size);
|
texture_cache.FlushRegion(addr, size);
|
||||||
buffer_cache.FlushRegion(addr, size);
|
buffer_cache.FlushRegion(addr, size);
|
||||||
|
query_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
@ -757,6 +746,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
texture_cache.InvalidateRegion(addr, size);
|
texture_cache.InvalidateRegion(addr, size);
|
||||||
shader_cache.InvalidateRegion(addr, size);
|
shader_cache.InvalidateRegion(addr, size);
|
||||||
buffer_cache.InvalidateRegion(addr, size);
|
buffer_cache.InvalidateRegion(addr, size);
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
|
|
@ -63,7 +63,7 @@ public:
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void DispatchCompute(GPUVAddr code_addr) override;
|
void DispatchCompute(GPUVAddr code_addr) override;
|
||||||
void ResetCounter(VideoCore::QueryType type) override;
|
void ResetCounter(VideoCore::QueryType type) override;
|
||||||
u64 Query(VideoCore::QueryType type) override;
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
|
@ -78,6 +78,11 @@ public:
|
||||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||||
|
|
||||||
|
/// Returns true when there are commands queued to the OpenGL server.
|
||||||
|
bool AnyCommandQueued() const {
|
||||||
|
return num_queued_commands > 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Configures the color and depth framebuffer states.
|
/// Configures the color and depth framebuffer states.
|
||||||
void ConfigureFramebuffers();
|
void ConfigureFramebuffers();
|
||||||
|
@ -207,6 +212,7 @@ private:
|
||||||
ShaderCacheOpenGL shader_cache;
|
ShaderCacheOpenGL shader_cache;
|
||||||
SamplerCacheOpenGL sampler_cache;
|
SamplerCacheOpenGL sampler_cache;
|
||||||
FramebufferCacheOpenGL framebuffer_cache;
|
FramebufferCacheOpenGL framebuffer_cache;
|
||||||
|
QueryCache query_cache;
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
ScreenInfo& screen_info;
|
ScreenInfo& screen_info;
|
||||||
|
@ -224,8 +230,6 @@ private:
|
||||||
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
||||||
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
||||||
|
|
||||||
HostCounter samples_passed{GL_SAMPLES_PASSED};
|
|
||||||
|
|
||||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||||
std::size_t num_queued_commands = 0;
|
std::size_t num_queued_commands = 0;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue