diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index dd41c3880..a9e9de652 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -10,6 +10,7 @@ #include #include +#include "video_core/color.h" #include "video_core/pica.h" #include "graphics_framebuffer.hxx" @@ -202,7 +203,8 @@ void GraphicsFramebufferWidget::OnUpdate() framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); framebuffer_height = framebuffer.GetHeight(); - framebuffer_format = static_cast(framebuffer.color_format); + // TODO: It's unknown how this format is actually specified + framebuffer_format = Format::RGBA8; break; } @@ -258,10 +260,10 @@ void GraphicsFramebufferWidget::OnUpdate() for (unsigned y = 0; y < framebuffer_height; ++y) { for (unsigned x = 0; x < framebuffer_width; ++x) { u16 value = *(u16*)(((u8*)color_buffer) + x * 2 + y * framebuffer_width * 2); - u8 r = (value >> 11) & 0x1F; - u8 g = (value >> 6) & 0x1F; - u8 b = (value >> 1) & 0x1F; - u8 a = value & 1; + u8 r = Color::Convert5To8((value >> 11) & 0x1F); + u8 g = Color::Convert5To8((value >> 6) & 0x1F); + u8 b = Color::Convert5To8((value >> 1) & 0x1F); + u8 a = Color::Convert1To8(value & 1); decoded_image.setPixel(x, y, qRgba(r, g, b, 255/*a*/)); } diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index dd619cb16..0ff6c6cde 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -94,11 +94,15 @@ inline void Write(u32 addr, const T data) { int r, g, b, a; } source_color = { 0, 0, 0, 0 }; + // Cheap emulation of horizontal scaling: Just skip each second pixel of the + // input framebuffer. We keep track of this in the pixel_skip variable. + unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; + switch (config.input_format) { case Regs::PixelFormat::RGBA8: { // TODO: Most likely got the component order messed up. - u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4; + u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip; source_color.r = srcptr[0]; // blue source_color.g = srcptr[1]; // green source_color.b = srcptr[2]; // red diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 292f496c1..7de055232 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -157,6 +157,9 @@ struct Regs { BitField< 8, 3, PixelFormat> input_format; BitField<12, 3, PixelFormat> output_format; BitField<16, 1, u32> output_tiled; // stores output in a tiled format + + // TODO: Not really sure if this actually scales, or even resizes at all. + BitField<24, 1, u32> scale_horizontally; }; INSERT_PADDING_WORDS(0x1); diff --git a/src/video_core/color.h b/src/video_core/color.h new file mode 100644 index 000000000..e86ac1265 --- /dev/null +++ b/src/video_core/color.h @@ -0,0 +1,32 @@ +// Copyright 2014 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "common/common_types.h" + +namespace Color { + +/// Convert a 1-bit color component to 8 bit +static inline u8 Convert1To8(u8 value) { + return value * 255; +} + +/// Convert a 4-bit color component to 8 bit +static inline u8 Convert4To8(u8 value) { + return (value << 4) | value; +} + +/// Convert a 5-bit color component to 8 bit +static inline u8 Convert5To8(u8 value) { + return (value << 3) | (value >> 2); +} + +/// Convert a 6-bit color component to 8 bit +static inline u8 Convert6To8(u8 value) { + return (value << 2) | (value >> 4); +} + + +} // namespace diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 9602779f4..0d9f4ba66 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -112,6 +112,11 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Initialize data for the current vertex VertexShader::InputVertex input; + // Load a debugging token to check whether this gets loaded by the running + // application or not. + static const float24 debug_token = float24::FromRawFloat24(0x00abcdef); + input.attr[0].w = debug_token; + for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { const u8* srcdata = Memory::GetPointer(PAddrToVAddr(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i])); @@ -136,6 +141,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } + // HACK: Some games do not initialize the vertex position's w component. This leads + // to critical issues since it messes up perspective division. As a + // workaround, we force the fourth component to 1.0 if we find this to be the + // case. + // To do this, we additionally have to assume that the first input attribute + // is the vertex position, since there's no information about this other than + // the empiric observation that this is usually the case. + if (input.attr[0].w == debug_token) + input.attr[0].w = float24::FromFloat32(1.0); + if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); @@ -173,6 +188,19 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): + case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): + { + int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); + auto values = registers.vs_int_uniforms[index]; + VertexShader::GetIntUniform(index) = Math::Vec4(values.x, values.y, values.z, values.w); + LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", + index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); + break; + } + case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 5921185a6..a494465b9 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -19,6 +19,7 @@ #include "common/log.h" #include "common/file_util.h" +#include "video_core/color.h" #include "video_core/math.h" #include "video_core/pica.h" @@ -359,29 +360,26 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture u8 g = ((source_ptr) >> 6) & 0x1F; u8 b = (source_ptr >> 1) & 0x1F; u8 a = source_ptr & 1; - return Math::MakeVec((r << 3) | (r >> 2), (g << 3) | (g >> 2), (b << 3) | (b >> 2), disable_alpha ? 255 : (a * 255)); + return Math::MakeVec(Color::Convert5To8(r), Color::Convert5To8(g), + Color::Convert5To8(b), disable_alpha ? 255 : Color::Convert1To8(a)); } case Regs::TextureFormat::RGB565: { const u16 source_ptr = *(const u16*)(source + offset * 2); - u8 r = (source_ptr >> 11) & 0x1F; - u8 g = ((source_ptr) >> 5) & 0x3F; - u8 b = (source_ptr) & 0x1F; - return Math::MakeVec((r << 3) | (r >> 2), (g << 2) | (g >> 4), (b << 3) | (b >> 2), 255); + u8 r = Color::Convert5To8((source_ptr >> 11) & 0x1F); + u8 g = Color::Convert6To8(((source_ptr) >> 5) & 0x3F); + u8 b = Color::Convert5To8((source_ptr) & 0x1F); + return Math::MakeVec(r, g, b, 255); } case Regs::TextureFormat::RGBA4: { const u8* source_ptr = source + offset * 2; - u8 r = source_ptr[1] >> 4; - u8 g = source_ptr[1] & 0xFF; - u8 b = source_ptr[0] >> 4; - u8 a = source_ptr[0] & 0xFF; - r = (r << 4) | r; - g = (g << 4) | g; - b = (b << 4) | b; - a = (a << 4) | a; + u8 r = Color::Convert4To8(source_ptr[1] >> 4); + u8 g = Color::Convert4To8(source_ptr[1] & 0xF); + u8 b = Color::Convert4To8(source_ptr[0] >> 4); + u8 a = Color::Convert4To8(source_ptr[0] & 0xF); return { r, g, b, disable_alpha ? (u8)255 : a }; } @@ -389,13 +387,11 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + offset * 2; - // TODO: component order not verified - if (disable_alpha) { // Show intensity as red, alpha as green - return { source_ptr[0], source_ptr[1], 0, 255 }; + return { source_ptr[1], source_ptr[0], 0, 255 }; } else { - return { source_ptr[0], source_ptr[0], source_ptr[0], source_ptr[1]}; + return { source_ptr[1], source_ptr[1], source_ptr[1], source_ptr[0]}; } } @@ -418,14 +414,10 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture case Regs::TextureFormat::IA4: { - const u8* source_ptr = source + offset / 2; + const u8* source_ptr = source + offset; - // TODO: component order not verified - - u8 i = (*source_ptr) & 0xF; - u8 a = ((*source_ptr) & 0xF0) >> 4; - a |= a << 4; - i |= i << 4; + u8 i = Color::Convert4To8(((*source_ptr) & 0xF0) >> 4); + u8 a = Color::Convert4To8((*source_ptr) & 0xF); if (disable_alpha) { // Show intensity as red, alpha as green @@ -439,15 +431,13 @@ const Math::Vec4 LookupTexture(const u8* source, int x, int y, const Texture { const u8* source_ptr = source + offset / 2; - // TODO: component order not verified - u8 a = (coarse_x % 2) ? ((*source_ptr)&0xF) : (((*source_ptr) & 0xF0) >> 4); - a |= a << 4; + a = Color::Convert4To8(a); if (disable_alpha) { - return { *source_ptr, *source_ptr, *source_ptr, 255 }; + return { a, a, a, 255 }; } else { - return { 0, 0, 0, *source_ptr }; + return { 0, 0, 0, a }; } } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 38bac748c..f5771ed84 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -50,7 +50,19 @@ struct Regs { u32 trigger_irq; - INSERT_PADDING_WORDS(0x30); + INSERT_PADDING_WORDS(0x2f); + + enum class CullMode : u32 { + // Select which polygons are considered to be "frontfacing". + KeepAll = 0, + KeepClockWise = 1, + KeepCounterClockWise = 2, + // TODO: What does the third value imply? + }; + + union { + BitField<0, 2, CullMode> cull_mode; + }; BitField<0, 24, u32> viewport_size_x; @@ -289,7 +301,7 @@ struct Regs { TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage5; - INSERT_PADDING_WORDS(0x13); + INSERT_PADDING_WORDS(0x3); const std::array GetTevStages() const { return { tev_stage0, tev_stage1, @@ -297,6 +309,60 @@ struct Regs { tev_stage4, tev_stage5 }; }; + struct { + enum DepthFunc : u32 { + Always = 1, + LessThan = 4, + GreaterThan = 6, + }; + + union { + // If false, logic blending is used + BitField<8, 1, u32> alphablend_enable; + }; + + union { + enum BlendEquation : u32 { + Add = 0, + }; + + enum BlendFactor : u32 { + Zero = 0, + One = 1, + + SourceAlpha = 6, + OneMinusSourceAlpha = 7, + }; + + BitField< 0, 8, BlendEquation> blend_equation_rgb; + BitField< 8, 8, BlendEquation> blend_equation_a; + + BitField<16, 4, BlendFactor> factor_source_rgb; + BitField<20, 4, BlendFactor> factor_dest_rgb; + + BitField<24, 4, BlendFactor> factor_source_a; + BitField<28, 4, BlendFactor> factor_dest_a; + } alpha_blending; + + union { + enum Op { + Set = 4, + }; + + BitField<0, 4, Op> op; + } logic_op; + + INSERT_PADDING_WORDS(0x4); + + union { + BitField< 0, 1, u32> depth_test_enable; + BitField< 4, 3, DepthFunc> depth_test_func; + BitField<12, 1, u32> depth_write_enable; + }; + + INSERT_PADDING_WORDS(0x8); + } output_merger; + struct { enum ColorFormat : u32 { RGBA8 = 0, @@ -495,8 +561,14 @@ struct Regs { INSERT_PADDING_WORDS(0x51); BitField<0, 16, u32> vs_bool_uniforms; + union { + BitField< 0, 8, u32> x; + BitField< 8, 8, u32> y; + BitField<16, 8, u32> z; + BitField<24, 8, u32> w; + } vs_int_uniforms[4]; - INSERT_PADDING_WORDS(0x9); + INSERT_PADDING_WORDS(0x5); // Offset to shader program entry point (in words) BitField<0, 16, u32> vs_main_offset; @@ -599,6 +671,7 @@ struct Regs { } while(false) ADD_FIELD(trigger_irq); + ADD_FIELD(cull_mode); ADD_FIELD(viewport_size_x); ADD_FIELD(viewport_size_y); ADD_FIELD(viewport_depth_range); @@ -617,6 +690,7 @@ struct Regs { ADD_FIELD(tev_stage3); ADD_FIELD(tev_stage4); ADD_FIELD(tev_stage5); + ADD_FIELD(output_merger); ADD_FIELD(framebuffer); ADD_FIELD(vertex_attributes); ADD_FIELD(index_array); @@ -625,6 +699,7 @@ struct Regs { ADD_FIELD(trigger_draw_indexed); ADD_FIELD(triangle_topology); ADD_FIELD(vs_bool_uniforms); + ADD_FIELD(vs_int_uniforms); ADD_FIELD(vs_main_offset); ADD_FIELD(vs_input_register_map); ADD_FIELD(vs_uniform_setup); @@ -668,6 +743,7 @@ private: #define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position") ASSERT_REG_POSITION(trigger_irq, 0x10); +ASSERT_REG_POSITION(cull_mode, 0x40); ASSERT_REG_POSITION(viewport_size_x, 0x41); ASSERT_REG_POSITION(viewport_size_y, 0x43); ASSERT_REG_POSITION(viewport_depth_range, 0x4d); @@ -688,6 +764,7 @@ ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage3, 0xd8); ASSERT_REG_POSITION(tev_stage4, 0xf0); ASSERT_REG_POSITION(tev_stage5, 0xf8); +ASSERT_REG_POSITION(output_merger, 0x100); ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); @@ -696,6 +773,7 @@ ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(triangle_topology, 0x25e); ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); +ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); ASSERT_REG_POSITION(vs_main_offset, 0x2ba); ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index a80148872..025d4e484 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -18,51 +18,82 @@ namespace Pica { namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4& color) { - u32* color_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetColorBufferPhysicalAddress()))); + const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); + u32* color_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(addr))); u32 value = (color.a() << 24) | (color.r() << 16) | (color.g() << 8) | color.b(); // Assuming RGBA8 format until actual framebuffer format handling is implemented *(color_buffer + x + y * registers.framebuffer.GetWidth()) = value; } +static const Math::Vec4 GetPixel(int x, int y) { + const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); + u32* color_buffer_u32 = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(addr))); + + u32 value = *(color_buffer_u32 + x + y * registers.framebuffer.GetWidth()); + Math::Vec4 ret; + ret.a() = value >> 24; + ret.r() = (value >> 16) & 0xFF; + ret.g() = (value >> 8) & 0xFF; + ret.b() = value & 0xFF; + return ret; + } + static u32 GetDepth(int x, int y) { - u16* depth_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); + const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); + u16* depth_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(addr))); // Assuming 16-bit depth buffer format until actual format handling is implemented return *(depth_buffer + x + y * registers.framebuffer.GetWidth()); } static void SetDepth(int x, int y, u16 value) { - u16* depth_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(registers.framebuffer.GetDepthBufferPhysicalAddress()))); + const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); + u16* depth_buffer = reinterpret_cast(Memory::GetPointer(PAddrToVAddr(addr))); // Assuming 16-bit depth buffer format until actual format handling is implemented *(depth_buffer + x + y * registers.framebuffer.GetWidth()) = value; } +// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values +struct Fix12P4 { + Fix12P4() {} + Fix12P4(u16 val) : val(val) {} + + static u16 FracMask() { return 0xF; } + static u16 IntMask() { return (u16)~0xF; } + + operator u16() const { + return val; + } + + bool operator < (const Fix12P4& oth) const { + return (u16)*this < (u16)oth; + } + +private: + u16 val; +}; + +/** + * Calculate signed area of the triangle spanned by the three argument vertices. + * The sign denotes an orientation. + * + * @todo define orientation concretely. + */ +static int SignedArea (const Math::Vec2& vtx1, + const Math::Vec2& vtx2, + const Math::Vec2& vtx3) { + const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); + const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); + // TODO: There is a very small chance this will overflow for sizeof(int) == 4 + return Math::Cross(vec1, vec2).z; +}; + void ProcessTriangle(const VertexShader::OutputVertex& v0, const VertexShader::OutputVertex& v1, const VertexShader::OutputVertex& v2) { - // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values - struct Fix12P4 { - Fix12P4() {} - Fix12P4(u16 val) : val(val) {} - - static u16 FracMask() { return 0xF; } - static u16 IntMask() { return (u16)~0xF; } - - operator u16() const { - return val; - } - - bool operator < (const Fix12P4& oth) const { - return (u16)*this < (u16)oth; - } - - private: - u16 val; - }; - // vertex positions in rasterizer coordinates auto FloatToFix = [](float24 flt) { return Fix12P4(static_cast(flt.ToFloat32() * 16.0f)); @@ -70,10 +101,23 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, auto ScreenToRasterizerCoordinates = [FloatToFix](const Math::Vec3 vec) { return Math::Vec3{FloatToFix(vec.x), FloatToFix(vec.y), FloatToFix(vec.z)}; }; + Math::Vec3 vtxpos[3]{ ScreenToRasterizerCoordinates(v0.screenpos), ScreenToRasterizerCoordinates(v1.screenpos), ScreenToRasterizerCoordinates(v2.screenpos) }; + if (registers.cull_mode == Regs::CullMode::KeepClockWise) { + // Reverse vertex order and use the CCW code path. + std::swap(vtxpos[1], vtxpos[2]); + } + + if (registers.cull_mode != Regs::CullMode::KeepAll) { + // Cull away triangles which are wound clockwise. + // TODO: A check for degenerate triangles ("== 0") should be considered for CullMode::KeepAll + if (SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) + return; + } + // TODO: Proper scissor rect test! u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); @@ -116,18 +160,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, for (u16 x = min_x; x < max_x; x += 0x10) { // Calculate the barycentric coordinates w0, w1 and w2 - auto orient2d = [](const Math::Vec2& vtx1, - const Math::Vec2& vtx2, - const Math::Vec2& vtx3) { - const auto vec1 = Math::MakeVec(vtx2 - vtx1, 0); - const auto vec2 = Math::MakeVec(vtx3 - vtx1, 0); - // TODO: There is a very small chance this will overflow for sizeof(int) == 4 - return Math::Cross(vec1, vec2).z; - }; - - int w0 = bias0 + orient2d(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); - int w1 = bias1 + orient2d(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); - int w2 = bias2 + orient2d(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); + int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); + int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); + int w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); int wsum = w0 + w1 + w2; // If current pixel is not covered by the current primitive @@ -201,8 +236,8 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, return 0; } }; - s = GetWrappedTexCoord(registers.texture0.wrap_s, s, registers.texture0.width); - t = GetWrappedTexCoord(registers.texture0.wrap_t, t, registers.texture0.height); + s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); + t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); u8* texture_data = Memory::GetPointer(PAddrToVAddr(texture.config.GetPhysicalAddress())); auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); @@ -279,12 +314,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto GetColorModifier = [](ColorModifier factor, const Math::Vec4& values) -> Math::Vec3 { + static auto GetColorModifier = [](ColorModifier factor, const Math::Vec4& values) -> Math::Vec3 { switch (factor) { case ColorModifier::SourceColor: return values.rgb(); + case ColorModifier::OneMinusSourceColor: + return (Math::Vec3(255, 255, 255) - values.rgb()).Cast(); + case ColorModifier::SourceAlpha: return { values.a(), values.a(), values.a() }; @@ -295,7 +333,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { + static auto GetAlphaModifier = [](AlphaModifier factor, u8 value) -> u8 { switch (factor) { case AlphaModifier::SourceAlpha: return value; @@ -310,7 +348,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto ColorCombine = [](Operation op, const Math::Vec3 input[3]) -> Math::Vec3 { + static auto ColorCombine = [](Operation op, const Math::Vec3 input[3]) -> Math::Vec3 { switch (op) { case Operation::Replace: return input[0]; @@ -330,6 +368,15 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Lerp: return ((input[0] * input[2] + input[1] * (Math::MakeVec(255, 255, 255) - input[2]).Cast()) / 255).Cast(); + case Operation::Subtract: + { + auto result = input[0].Cast() - input[1].Cast(); + result.r() = std::max(0, result.r()); + result.g() = std::max(0, result.g()); + result.b() = std::max(0, result.b()); + return result.Cast(); + } + default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); _dbg_assert_(HW_GPU, 0); @@ -337,7 +384,7 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, } }; - auto AlphaCombine = [](Operation op, const std::array& input) -> u8 { + static auto AlphaCombine = [](Operation op, const std::array& input) -> u8 { switch (op) { case Operation::Replace: return input[0]; @@ -351,6 +398,9 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, case Operation::Lerp: return (input[0] * input[2] + input[1] * (255 - input[2])) / 255; + case Operation::Subtract: + return std::max(0, (int)input[0] - (int)input[1]); + default: LOG_ERROR(HW_GPU, "Unknown alpha combiner operation %d\n", (int)op); _dbg_assert_(HW_GPU, 0); @@ -381,12 +431,111 @@ void ProcessTriangle(const VertexShader::OutputVertex& v0, combiner_output = Math::MakeVec(color_output, alpha_output); } - // TODO: Not sure if the multiplication by 65535 has already been taken care - // of when transforming to screen coordinates or not. - u16 z = (u16)(((float)v0.screenpos[2].ToFloat32() * w0 + - (float)v1.screenpos[2].ToFloat32() * w1 + - (float)v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); - SetDepth(x >> 4, y >> 4, z); + // TODO: Does depth indeed only get written even if depth testing is enabled? + if (registers.output_merger.depth_test_enable) { + u16 z = (u16)(-(v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum); + u16 ref_z = GetDepth(x >> 4, y >> 4); + + bool pass = false; + + switch (registers.output_merger.depth_test_func) { + case registers.output_merger.Always: + pass = true; + break; + + case registers.output_merger.LessThan: + pass = z < ref_z; + break; + + case registers.output_merger.GreaterThan: + pass = z > ref_z; + break; + + default: + LOG_ERROR(HW_GPU, "Unknown depth test function %x", registers.output_merger.depth_test_func.Value()); + break; + } + + if (!pass) + continue; + + if (registers.output_merger.depth_write_enable) + SetDepth(x >> 4, y >> 4, z); + } + + auto dest = GetPixel(x >> 4, y >> 4); + + if (registers.output_merger.alphablend_enable) { + auto params = registers.output_merger.alpha_blending; + + auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3 { + switch(factor) { + case params.Zero: + return Math::Vec3(0, 0, 0); + + case params.One: + return Math::Vec3(255, 255, 255); + + case params.SourceAlpha: + return Math::MakeVec(combiner_output.a(), combiner_output.a(), combiner_output.a()); + + case params.OneMinusSourceAlpha: + return Math::Vec3(255-combiner_output.a(), 255-combiner_output.a(), 255-combiner_output.a()); + + default: + LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); + exit(0); + break; + } + }; + + auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { + switch(factor) { + case params.Zero: + return 0; + + case params.One: + return 255; + + case params.SourceAlpha: + return combiner_output.a(); + + case params.OneMinusSourceAlpha: + return 255 - combiner_output.a(); + + default: + LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); + exit(0); + break; + } + }; + + auto srcfactor = Math::MakeVec(LookupFactorRGB(params.factor_source_rgb), + LookupFactorA(params.factor_source_a)); + auto dstfactor = Math::MakeVec(LookupFactorRGB(params.factor_dest_rgb), + LookupFactorA(params.factor_dest_a)); + + switch (params.blend_equation_rgb) { + case params.Add: + { + auto result = (combiner_output * srcfactor + dest * dstfactor) / 255; + result.r() = std::min(255, result.r()); + result.g() = std::min(255, result.g()); + result.b() = std::min(255, result.b()); + combiner_output = result.Cast(); + break; + } + + default: + LOG_CRITICAL(HW_GPU, "Unknown RGB blend equation %x", params.blend_equation_rgb.Value()); + exit(0); + } + } else { + LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); + exit(0); + } DrawPixel(x >> 4, y >> 4, combiner_output); } diff --git a/src/video_core/utils.h b/src/video_core/utils.h index 63ebccbde..6fd640425 100644 --- a/src/video_core/utils.h +++ b/src/video_core/utils.h @@ -8,32 +8,6 @@ #include "common/common_types.h" -namespace FormatPrecision { - -/// Adjust RGBA8 color with RGBA6 precision -static inline u32 rgba8_with_rgba6(u32 src) { - u32 color = src; - color &= 0xFCFCFCFC; - color |= (color >> 6) & 0x03030303; - return color; -} - -/// Adjust RGBA8 color with RGB565 precision -static inline u32 rgba8_with_rgb565(u32 src) { - u32 color = (src & 0xF8FCF8); - color |= (color >> 5) & 0x070007; - color |= (color >> 6) & 0x000300; - color |= 0xFF000000; - return color; -} - -/// Adjust Z24 depth value with Z16 precision -static inline u32 z24_with_z16(u32 src) { - return (src & 0xFFFF00) | (src >> 16); -} - -} // namespace - namespace VideoCore { /// Structure for the TGA texture format (for dumping) diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index bed5081a0..ff825e2e1 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -30,6 +30,8 @@ static struct { Math::Vec4 f[96]; std::array b; + + std::array,4> i; } shader_uniforms; // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! @@ -37,33 +39,31 @@ static struct { static std::array shader_memory; static std::array swizzle_data; -void SubmitShaderMemoryChange(u32 addr, u32 value) -{ +void SubmitShaderMemoryChange(u32 addr, u32 value) { shader_memory[addr] = value; } -void SubmitSwizzleDataChange(u32 addr, u32 value) -{ +void SubmitSwizzleDataChange(u32 addr, u32 value) { swizzle_data[addr] = value; } -Math::Vec4& GetFloatUniform(u32 index) -{ +Math::Vec4& GetFloatUniform(u32 index) { return shader_uniforms.f[index]; } -bool& GetBoolUniform(u32 index) -{ +bool& GetBoolUniform(u32 index) { return shader_uniforms.b[index]; } -const std::array& GetShaderBinary() -{ +Math::Vec4& GetIntUniform(u32 index) { + return shader_uniforms.i[index]; +} + +const std::array& GetShaderBinary() { return shader_memory; } -const std::array& GetSwizzlePatterns() -{ +const std::array& GetSwizzlePatterns() { return swizzle_data; } @@ -437,8 +437,7 @@ static void ProcessShaderCode(VertexShaderState& state) { } } -OutputVertex RunShader(const InputVertex& input, int num_attributes) -{ +OutputVertex RunShader(const InputVertex& input, int num_attributes) { VertexShaderState state; const u32* main = &shader_memory[registers.vs_main_offset]; diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index af3fb2a2f..3a68a3409 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -73,6 +73,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes); Math::Vec4& GetFloatUniform(u32 index); bool& GetBoolUniform(u32 index); +Math::Vec4& GetIntUniform(u32 index); const std::array& GetShaderBinary(); const std::array& GetSwizzlePatterns();