Maxwell3D: Implement State Dirty Flags.

This commit is contained in:
Fernando Sahmkow 2019-07-13 16:52:32 -04:00 committed by FernandoS27
parent 0d3db58657
commit a081dea8ab
6 changed files with 197 additions and 42 deletions

View File

@ -159,6 +159,88 @@ void Maxwell3D::InitDirtySettings() {
sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32); sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count, set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
DIRTY_REGS_POS(shaders)); DIRTY_REGS_POS(shaders));
// State
// Viewport
constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
set_block(viewport_start, viewport_size, viewport_dirty_reg);
constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
// Viewport transformation
constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
// Cullmode
constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
// Screen y control
dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
// Primitive Restart
constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
// Depth Test
constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
// Stencil Test
constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
// Color Mask
constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
color_mask_dirty_reg);
// Blend State
constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
blend_state_dirty_reg);
dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
blend_state_dirty_reg);
// Scissor State
constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
scissor_test_dirty_reg);
// Polygon Offset
constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
} }
void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) { void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {

View File

@ -1148,22 +1148,20 @@ public:
bool shaders; bool shaders;
// State // State
bool viewport; bool viewport;
bool clip_enabled;
bool clip_coefficient; bool clip_coefficient;
bool cull_mode; bool cull_mode;
bool primitive_restart; bool primitive_restart;
bool depth_test; bool depth_test;
bool stencil_test; bool stencil_test;
bool blend_state; bool blend_state;
bool logic_op;
bool fragment_color_clamp;
bool multi_sample;
bool scissor_test; bool scissor_test;
bool transform_feedback; bool transform_feedback;
bool point;
bool color_mask; bool color_mask;
bool polygon_offset; bool polygon_offset;
bool alpha_test;
// Complementary
bool viewport_transform;
bool screen_y_control;
bool memory_general; bool memory_general;
}; };

View File

@ -936,11 +936,11 @@ void RasterizerOpenGL::SyncClipCoef() {
} }
void RasterizerOpenGL::SyncCullMode() { void RasterizerOpenGL::SyncCullMode() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
const auto& regs = maxwell3d.regs;
state.cull.enabled = regs.cull.enabled != 0; state.cull.enabled = regs.cull.enabled != 0;
if (state.cull.enabled) {
state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
@ -956,36 +956,33 @@ void RasterizerOpenGL::SyncCullMode() {
else if (state.cull.front_face == GL_CW) else if (state.cull.front_face == GL_CW)
state.cull.front_face = GL_CCW; state.cull.front_face = GL_CCW;
} }
}
} }
void RasterizerOpenGL::SyncPrimitiveRestart() { void RasterizerOpenGL::SyncPrimitiveRestart() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
const auto& regs = maxwell3d.regs;
state.primitive_restart.enabled = regs.primitive_restart.enabled; state.primitive_restart.enabled = regs.primitive_restart.enabled;
state.primitive_restart.index = regs.primitive_restart.index; state.primitive_restart.index = regs.primitive_restart.index;
} }
void RasterizerOpenGL::SyncDepthTestState() { void RasterizerOpenGL::SyncDepthTestState() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
const auto& regs = maxwell3d.regs;
state.depth.test_enabled = regs.depth_test_enable != 0; state.depth.test_enabled = regs.depth_test_enable != 0;
state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
if (!state.depth.test_enabled)
return;
state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
} }
void RasterizerOpenGL::SyncStencilTestState() { void RasterizerOpenGL::SyncStencilTestState() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
state.stencil.test_enabled = regs.stencil_enable != 0; if (!maxwell3d.dirty.stencil_test) {
if (!regs.stencil_enable) {
return; return;
} }
const auto& regs = maxwell3d.regs;
state.stencil.test_enabled = regs.stencil_enable != 0;
state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
state.stencil.front.test_ref = regs.stencil_front_func_ref; state.stencil.front.test_ref = regs.stencil_front_func_ref;
state.stencil.front.test_mask = regs.stencil_front_func_mask; state.stencil.front.test_mask = regs.stencil_front_func_mask;
@ -1010,10 +1007,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
state.stencil.back.action_depth_fail = GL_KEEP; state.stencil.back.action_depth_fail = GL_KEEP;
state.stencil.back.action_depth_pass = GL_KEEP; state.stencil.back.action_depth_pass = GL_KEEP;
} }
state.MarkDirtyStencilState(true);
maxwell3d.dirty.stencil_test = false;
} }
void RasterizerOpenGL::SyncColorMask() { void RasterizerOpenGL::SyncColorMask() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.color_mask) {
return;
}
const auto& regs = maxwell3d.regs;
const std::size_t count = const std::size_t count =
regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
for (std::size_t i = 0; i < count; i++) { for (std::size_t i = 0; i < count; i++) {
@ -1024,6 +1028,9 @@ void RasterizerOpenGL::SyncColorMask() {
dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
} }
state.MarkDirtyColorMask(true);
maxwell3d.dirty.color_mask = false;
} }
void RasterizerOpenGL::SyncMultiSampleState() { void RasterizerOpenGL::SyncMultiSampleState() {
@ -1038,7 +1045,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
} }
void RasterizerOpenGL::SyncBlendState() { void RasterizerOpenGL::SyncBlendState() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.blend_state) {
return;
}
const auto& regs = maxwell3d.regs;
state.blend_color.red = regs.blend_color.r; state.blend_color.red = regs.blend_color.r;
state.blend_color.green = regs.blend_color.g; state.blend_color.green = regs.blend_color.g;
@ -1061,6 +1072,8 @@ void RasterizerOpenGL::SyncBlendState() {
for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
state.blend[i].enabled = false; state.blend[i].enabled = false;
} }
maxwell3d.dirty.blend_state = false;
state.MarkDirtyBlendState(true);
return; return;
} }
@ -1077,6 +1090,9 @@ void RasterizerOpenGL::SyncBlendState() {
blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
} }
state.MarkDirtyBlendState(true);
maxwell3d.dirty.blend_state = false;
} }
void RasterizerOpenGL::SyncLogicOpState() { void RasterizerOpenGL::SyncLogicOpState() {
@ -1128,13 +1144,21 @@ void RasterizerOpenGL::SyncPointState() {
} }
void RasterizerOpenGL::SyncPolygonOffset() { void RasterizerOpenGL::SyncPolygonOffset() {
const auto& regs = system.GPU().Maxwell3D().regs; auto& maxwell3d = system.GPU().Maxwell3D();
if (!maxwell3d.dirty.polygon_offset) {
return;
}
const auto& regs = maxwell3d.regs;
state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
state.polygon_offset.units = regs.polygon_offset_units; state.polygon_offset.units = regs.polygon_offset_units;
state.polygon_offset.factor = regs.polygon_offset_factor; state.polygon_offset.factor = regs.polygon_offset_factor;
state.polygon_offset.clamp = regs.polygon_offset_clamp; state.polygon_offset.clamp = regs.polygon_offset_clamp;
state.MarkDirtyPolygonOffset(true);
maxwell3d.dirty.polygon_offset = false;
} }
void RasterizerOpenGL::SyncAlphaTest() { void RasterizerOpenGL::SyncAlphaTest() {

View File

@ -526,7 +526,7 @@ void OpenGLState::ApplySamplers() const {
} }
} }
void OpenGLState::Apply() const { void OpenGLState::Apply() {
MICROPROFILE_SCOPE(OpenGL_State); MICROPROFILE_SCOPE(OpenGL_State);
ApplyFramebufferState(); ApplyFramebufferState();
ApplyVertexArrayState(); ApplyVertexArrayState();
@ -536,19 +536,31 @@ void OpenGLState::Apply() const {
ApplyPointSize(); ApplyPointSize();
ApplyFragmentColorClamp(); ApplyFragmentColorClamp();
ApplyMultisample(); ApplyMultisample();
ApplyDepthClamp(); if (dirty.color_mask) {
ApplyColorMask(); ApplyColorMask();
dirty.color_mask = false;
}
ApplyDepthClamp();
ApplyViewport(); ApplyViewport();
if (dirty.stencil_state) {
ApplyStencilTest(); ApplyStencilTest();
dirty.stencil_state = false;
}
ApplySRgb(); ApplySRgb();
ApplyCulling(); ApplyCulling();
ApplyDepth(); ApplyDepth();
ApplyPrimitiveRestart(); ApplyPrimitiveRestart();
if (dirty.blend_state) {
ApplyBlending(); ApplyBlending();
dirty.blend_state = false;
}
ApplyLogicOp(); ApplyLogicOp();
ApplyTextures(); ApplyTextures();
ApplySamplers(); ApplySamplers();
if (dirty.polygon_offset) {
ApplyPolygonOffset(); ApplyPolygonOffset();
dirty.polygon_offset = false;
}
ApplyAlphaTest(); ApplyAlphaTest();
} }

View File

@ -196,7 +196,7 @@ public:
} }
/// Apply this state as the current OpenGL state /// Apply this state as the current OpenGL state
void Apply() const; void Apply();
void ApplyFramebufferState() const; void ApplyFramebufferState() const;
void ApplyVertexArrayState() const; void ApplyVertexArrayState() const;
@ -237,11 +237,46 @@ public:
/// Viewport does not affects glClearBuffer so emulate viewport using scissor test /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
void EmulateViewportWithScissor(); void EmulateViewportWithScissor();
void MarkDirtyBlendState(const bool is_dirty) {
dirty.blend_state = is_dirty;
}
void MarkDirtyStencilState(const bool is_dirty) {
dirty.stencil_state = is_dirty;
}
void MarkDirtyViewportState(const bool is_dirty) {
dirty.viewport_state = is_dirty;
}
void MarkDirtyPolygonOffset(const bool is_dirty) {
dirty.polygon_offset = is_dirty;
}
void MarkDirtyColorMask(const bool is_dirty) {
dirty.color_mask = is_dirty;
}
void AllDirty() {
dirty.blend_state = true;
dirty.stencil_state = true;
dirty.viewport_state = true;
dirty.polygon_offset = true;
dirty.color_mask = true;
}
private: private:
static OpenGLState cur_state; static OpenGLState cur_state;
// Workaround for sRGB problems caused by QT not supporting srgb output // Workaround for sRGB problems caused by QT not supporting srgb output
static bool s_rgb_used; static bool s_rgb_used;
struct {
bool blend_state;
bool stencil_state;
bool viewport_state;
bool polygon_offset;
bool color_mask;
} dirty{};
}; };
} // namespace OpenGL } // namespace OpenGL

View File

@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const auto& dst_params{dst_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()};
OpenGLState prev_state{OpenGLState::GetCurState()}; OpenGLState prev_state{OpenGLState::GetCurState()};
SCOPE_EXIT({ prev_state.Apply(); }); SCOPE_EXIT({
prev_state.AllDirty();
prev_state.Apply();
});
OpenGLState state; OpenGLState state;
state.draw.read_framebuffer = src_framebuffer.handle; state.draw.read_framebuffer = src_framebuffer.handle;
state.draw.draw_framebuffer = dst_framebuffer.handle; state.draw.draw_framebuffer = dst_framebuffer.handle;
state.AllDirty();
state.Apply(); state.Apply();
u32 buffers{}; u32 buffers{};