gl_rasterizer: Implement quads topology

This commit is contained in:
ReinUsesLisp 2018-10-02 14:47:26 -03:00
parent 393042c09c
commit 3e2380327a
8 changed files with 236 additions and 46 deletions

View File

@ -27,6 +27,8 @@ add_library(video_core STATIC
renderer_base.h renderer_base.h
renderer_opengl/gl_buffer_cache.cpp renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_primitive_assembler.cpp
renderer_opengl/gl_primitive_assembler.h
renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp renderer_opengl/gl_rasterizer_cache.cpp

View File

@ -744,6 +744,12 @@ public:
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) | return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
end_addr_low); end_addr_low);
} }
/// Adjust the index buffer offset so it points to the first desired index.
GPUVAddr IndexStart() const {
return StartAddress() + static_cast<size_t>(first) *
static_cast<size_t>(FormatSizeInBytes());
}
} index_array; } index_array;
INSERT_PADDING_WORDS(0x7); INSERT_PADDING_WORDS(0x7);

View File

@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size
} }
AlignBuffer(alignment); AlignBuffer(alignment);
GLintptr uploaded_offset = buffer_offset; const GLintptr uploaded_offset = buffer_offset;
Memory::ReadBlock(*cpu_addr, buffer_ptr, size); Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
std::size_t alignment) { std::size_t alignment) {
AlignBuffer(alignment); AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size); std::memcpy(buffer_ptr, raw_pointer, size);
GLintptr uploaded_offset = buffer_offset; const GLintptr uploaded_offset = buffer_offset;
buffer_ptr += size; buffer_ptr += size;
buffer_offset += size; buffer_offset += size;
return uploaded_offset; return uploaded_offset;
} }
std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
AlignBuffer(alignment);
u8* const uploaded_ptr = buffer_ptr;
const GLintptr uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return std::make_tuple(uploaded_ptr, uploaded_offset);
}
void OGLBufferCache::Map(std::size_t max_size) { void OGLBufferCache::Map(std::size_t max_size) {
bool invalidate; bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) = std::tie(buffer_ptr, buffer_offset_base, invalidate) =
@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) {
InvalidateAll(); InvalidateAll();
} }
} }
void OGLBufferCache::Unmap() { void OGLBufferCache::Unmap() {
stream_buffer.Unmap(buffer_offset - buffer_offset_base); stream_buffer.Unmap(buffer_offset - buffer_offset_base);
} }
@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const {
void OGLBufferCache::AlignBuffer(std::size_t alignment) { void OGLBufferCache::AlignBuffer(std::size_t alignment) {
// Align the offset, not the mapped pointer // Align the offset, not the mapped pointer
GLintptr offset_aligned = const GLintptr offset_aligned =
static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment));
buffer_ptr += offset_aligned - buffer_offset; buffer_ptr += offset_aligned - buffer_offset;
buffer_offset = offset_aligned; buffer_offset = offset_aligned;

View File

@ -6,6 +6,7 @@
#include <cstddef> #include <cstddef>
#include <memory> #include <memory>
#include <tuple>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_cache.h"
@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer
public: public:
explicit OGLBufferCache(std::size_t size); explicit OGLBufferCache(std::size_t size);
/// Uploads data from a guest GPU address. Returns host's buffer offset where it's been
/// allocated.
GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool cache = true); bool cache = true);
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
/// Reserves memory to be used by host's CPU. Returns mapped address and offset.
std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
void Map(std::size_t max_size); void Map(std::size_t max_size);
void Unmap(); void Unmap();

View File

@ -0,0 +1,64 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include "common/assert.h"
#include "common/common_types.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
namespace OpenGL {
constexpr u32 TRIANGLES_PER_QUAD = 6;
constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
PrimitiveAssembler::~PrimitiveAssembler() = default;
std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
}
GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
const std::size_t size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
const u32 index = first + primitive * 4 + QUAD_MAP[i];
std::memcpy(dst_pointer, &index, sizeof(index));
dst_pointer += sizeof(index);
}
}
return index_offset;
}
GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size,
u32 count) {
const std::size_t map_size{CalculateQuadSize(count)};
auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
const u8* source{Memory::GetPointer(*cpu_addr)};
for (u32 primitive = 0; primitive < count / 4; ++primitive) {
for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
const u32 index = primitive * 4 + QUAD_MAP[i];
const u8* src_offset = source + (index * index_size);
std::memcpy(dst_pointer, src_offset, index_size);
dst_pointer += index_size;
}
}
return index_offset;
}
} // namespace OpenGL

View File

@ -0,0 +1,33 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/memory_manager.h"
namespace OpenGL {
class OGLBufferCache;
class PrimitiveAssembler {
public:
explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
~PrimitiveAssembler();
/// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
std::size_t CalculateQuadSize(u32 count) const;
GLintptr MakeQuadArray(u32 first, u32 count);
GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count);
private:
OGLBufferCache& buffer_cache;
};
} // namespace OpenGL

View File

@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12
MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100));
struct DrawParameters {
GLenum primitive_mode;
GLsizei count;
GLint current_instance;
bool use_indexed;
GLint vertex_first;
GLenum index_format;
GLint base_vertex;
GLintptr index_buffer_offset;
void DispatchDraw() const {
if (use_indexed) {
const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset);
if (current_instance > 0) {
glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format,
index_buffer_ptr, 1, base_vertex,
current_instance);
} else {
glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr,
base_vertex);
}
} else {
if (current_instance > 0) {
glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1,
current_instance);
} else {
glDrawArrays(primitive_mode, vertex_first, count);
}
}
}
};
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() {
} }
} }
DrawParameters RasterizerOpenGL::SetupDraw() {
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs;
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
DrawParameters params{};
params.current_instance = gpu.state.current_instance;
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
params.use_indexed = true;
params.primitive_mode = GL_TRIANGLES;
if (is_indexed) {
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
params.count = (regs.index_array.count / 4) * 6;
params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
regs.index_array.count);
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
} else {
// MakeQuadArray always generates u32 indexes
params.index_format = GL_UNSIGNED_INT;
params.count = (regs.vertex_buffer.count / 4) * 6;
params.index_buffer_offset =
primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count);
}
return params;
}
params.use_indexed = is_indexed;
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
if (is_indexed) {
MICROPROFILE_SCOPE(OpenGL_Index);
params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
params.count = regs.index_array.count;
params.index_buffer_offset =
buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize());
params.base_vertex = static_cast<GLint>(regs.vb_element_base);
} else {
params.count = regs.vertex_buffer.count;
params.vertex_first = regs.vertex_buffer.first;
}
}
void RasterizerOpenGL::SetupShaders() { void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader); MICROPROFILE_SCOPE(OpenGL_Shader);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
return size; return size;
} }
std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
return static_cast<std::size_t>(regs.index_array.count) *
static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
}
bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) {
accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays;
DrawArrays(); DrawArrays();
@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() {
// Draw the vertex batch // Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) *
static_cast<u64>(regs.index_array.FormatSizeInBytes())};
state.draw.vertex_buffer = buffer_cache.GetHandle(); state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply(); state.Apply();
std::size_t buffer_size = CalculateVertexArraysSize(); std::size_t buffer_size = CalculateVertexArraysSize();
if (is_indexed) { // Add space for index buffer (keeping in mind non-core primitives)
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; switch (regs.draw.topology) {
case Maxwell::PrimitiveTopology::Quads:
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
break;
default:
if (is_indexed) {
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize();
}
break;
} }
// Uniform space for the 5 shader stages // Uniform space for the 5 shader stages
@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() {
buffer_cache.Map(buffer_size); buffer_cache.Map(buffer_size);
SetupVertexArrays(); SetupVertexArrays();
DrawParameters params = SetupDraw();
// If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0;
if (is_indexed) {
MICROPROFILE_SCOPE(OpenGL_Index);
// Adjust the index buffer offset so it points to the first desired index.
auto index_start = regs.index_array.StartAddress();
index_start += static_cast<size_t>(regs.index_array.first) *
static_cast<size_t>(regs.index_array.FormatSizeInBytes());
index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size);
}
SetupShaders(); SetupShaders();
buffer_cache.Unmap(); buffer_cache.Unmap();
@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() {
shader_program_manager->ApplyTo(state); shader_program_manager->ApplyTo(state);
state.Apply(); state.Apply();
const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; // Execute draw call
if (is_indexed) { params.DispatchDraw();
const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)};
if (gpu.state.current_instance > 0) {
glDrawElementsInstancedBaseVertexBaseInstance(
primitive_mode, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex,
gpu.state.current_instance);
} else {
glDrawElementsBaseVertex(primitive_mode, regs.index_array.count,
MaxwellToGL::IndexFormat(regs.index_array.format),
reinterpret_cast<const void*>(index_buffer_offset),
base_vertex);
}
} else {
if (gpu.state.current_instance > 0) {
glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first,
regs.vertex_buffer.count, 1,
gpu.state.current_instance);
} else {
glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count);
}
}
// Disable scissor test // Disable scissor test
state.scissor.enabled = false; state.scissor.enabled = false;

View File

@ -23,6 +23,7 @@
#include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h"
@ -38,6 +39,7 @@ class EmuWindow;
namespace OpenGL { namespace OpenGL {
struct ScreenInfo; struct ScreenInfo;
struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerInterface { class RasterizerOpenGL : public VideoCore::RasterizerInterface {
public: public:
@ -192,12 +194,17 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache; OGLBufferCache buffer_cache;
OGLFramebuffer framebuffer; OGLFramebuffer framebuffer;
PrimitiveAssembler primitive_assembler{buffer_cache};
GLint uniform_buffer_alignment; GLint uniform_buffer_alignment;
std::size_t CalculateVertexArraysSize() const; std::size_t CalculateVertexArraysSize() const;
std::size_t CalculateIndexBufferSize() const;
void SetupVertexArrays(); void SetupVertexArrays();
DrawParameters SetupDraw();
void SetupShaders(); void SetupShaders();
enum class AccelDraw { Disabled, Arrays, Indexed }; enum class AccelDraw { Disabled, Arrays, Indexed };