GPU: Implemented more depth buffer formats.
This fixes the horizontal lines in Picross E, Cubic Ninja, Cave Story 3D and possibly others
This commit is contained in:
parent
4b8d4d0ed5
commit
414b0741c4
5 changed files with 120 additions and 14 deletions
|
@ -81,9 +81,9 @@ inline void Write(u32 addr, const T data) {
|
||||||
if (config.fill_24bit) {
|
if (config.fill_24bit) {
|
||||||
// fill with 24-bit values
|
// fill with 24-bit values
|
||||||
for (u8* ptr = start; ptr < end; ptr += 3) {
|
for (u8* ptr = start; ptr < end; ptr += 3) {
|
||||||
ptr[0] = config.value_24bit_b;
|
ptr[0] = config.value_24bit_r;
|
||||||
ptr[1] = config.value_24bit_g;
|
ptr[1] = config.value_24bit_g;
|
||||||
ptr[2] = config.value_24bit_r;
|
ptr[2] = config.value_24bit_b;
|
||||||
}
|
}
|
||||||
} else if (config.fill_32bit) {
|
} else if (config.fill_32bit) {
|
||||||
// fill with 32-bit values
|
// fill with 32-bit values
|
||||||
|
|
|
@ -100,10 +100,10 @@ struct Regs {
|
||||||
// Set to 1 upon completion.
|
// Set to 1 upon completion.
|
||||||
BitField<0, 1, u32> finished;
|
BitField<0, 1, u32> finished;
|
||||||
|
|
||||||
// 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values
|
// If both of these bits are unset, then it will fill the memory with a 16 bit value
|
||||||
|
// 1: fill with 24-bit wide values
|
||||||
BitField<8, 1, u32> fill_24bit;
|
BitField<8, 1, u32> fill_24bit;
|
||||||
|
// 1: fill with 32-bit wide values
|
||||||
// 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
|
|
||||||
BitField<9, 1, u32> fill_32bit;
|
BitField<9, 1, u32> fill_32bit;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -100,6 +100,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
|
||||||
Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF) };
|
Convert4To8((pixel >> 4) & 0xF), Convert4To8(pixel & 0xF) };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode a depth value stored in D16 format
|
||||||
|
* @param bytes Pointer to encoded source value
|
||||||
|
* @return Depth value as an u32
|
||||||
|
*/
|
||||||
|
inline const u32 DecodeD16(const u8* bytes) {
|
||||||
|
return *reinterpret_cast<const u16_le*>(bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode a depth value stored in D24 format
|
||||||
|
* @param bytes Pointer to encoded source value
|
||||||
|
* @return Depth value as an u32
|
||||||
|
*/
|
||||||
|
inline const u32 DecodeD24(const u8* bytes) {
|
||||||
|
return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decode a depth value and a stencil value stored in D24S8 format
|
||||||
|
* @param bytes Pointer to encoded source values
|
||||||
|
* @return Resulting values stored as a Math::Vec2
|
||||||
|
*/
|
||||||
|
inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
|
||||||
|
return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encode a color as RGBA8 format
|
* Encode a color as RGBA8 format
|
||||||
* @param color Source color to encode
|
* @param color Source color to encode
|
||||||
|
@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
|
||||||
(Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
|
(Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a depth value as D16 format
|
||||||
|
* @param value Source depth value to encode
|
||||||
|
* @param bytes Pointer where to store the encoded value
|
||||||
|
*/
|
||||||
|
inline void EncodeD16(u32 value, u8* bytes) {
|
||||||
|
*reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode a depth value as D24 format
|
||||||
|
* @param value Source depth value to encode
|
||||||
|
* @param bytes Pointer where to store the encoded value
|
||||||
|
*/
|
||||||
|
inline void EncodeD24(u32 value, u8* bytes) {
|
||||||
|
bytes[0] = value & 0xFF;
|
||||||
|
bytes[1] = (value >> 8) & 0xFF;
|
||||||
|
bytes[2] = (value >> 16) & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encode depth and stencil values as D24S8 format
|
||||||
|
* @param depth Source depth values to encode
|
||||||
|
* @param stencil Source stencil value to encode
|
||||||
|
* @param bytes Pointer where to store the encoded value
|
||||||
|
*/
|
||||||
|
inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
|
||||||
|
*reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -418,6 +418,13 @@ struct Regs {
|
||||||
RGBA4 = 4,
|
RGBA4 = 4,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum DepthFormat : u32 {
|
||||||
|
D16 = 0,
|
||||||
|
|
||||||
|
D24 = 2,
|
||||||
|
D24S8 = 3
|
||||||
|
};
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x6);
|
INSERT_PADDING_WORDS(0x6);
|
||||||
|
|
||||||
u32 depth_format;
|
u32 depth_format;
|
||||||
|
|
|
@ -100,23 +100,65 @@ static u32 GetDepth(int x, int y) {
|
||||||
y = (registers.framebuffer.height - y);
|
y = (registers.framebuffer.height - y);
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
u32 stride = registers.framebuffer.width * 2;
|
|
||||||
|
|
||||||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
switch (registers.framebuffer.depth_format) {
|
||||||
return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
|
case registers.framebuffer.D16:
|
||||||
|
{
|
||||||
|
u32 stride = registers.framebuffer.width * 2;
|
||||||
|
return Color::DecodeD16(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
|
||||||
|
}
|
||||||
|
case registers.framebuffer.D24:
|
||||||
|
{
|
||||||
|
u32 stride = registers.framebuffer.width * 3;
|
||||||
|
u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
|
||||||
|
return Color::DecodeD24(address);
|
||||||
|
}
|
||||||
|
case registers.framebuffer.D24S8:
|
||||||
|
{
|
||||||
|
u32 stride = registers.framebuffer.width * 4;
|
||||||
|
return Color::DecodeD24S8(depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride).x;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void SetDepth(int x, int y, u16 value) {
|
static void SetDepth(int x, int y, u32 value) {
|
||||||
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
|
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
|
||||||
u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
|
u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
|
||||||
|
|
||||||
y = (registers.framebuffer.height - y);
|
y = (registers.framebuffer.height - y);
|
||||||
|
|
||||||
const u32 coarse_y = y & ~7;
|
const u32 coarse_y = y & ~7;
|
||||||
u32 stride = registers.framebuffer.width * 2;
|
|
||||||
|
|
||||||
// Assuming 16-bit depth buffer format until actual format handling is implemented
|
switch (registers.framebuffer.depth_format) {
|
||||||
*(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
|
case registers.framebuffer.D16:
|
||||||
|
{
|
||||||
|
u32 stride = registers.framebuffer.width * 2;
|
||||||
|
Color::EncodeD16(value, depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case registers.framebuffer.D24:
|
||||||
|
{
|
||||||
|
u32 stride = registers.framebuffer.width * 3;
|
||||||
|
u8* address = depth_buffer + VideoCore::GetMortonOffset(x, y, 3) + coarse_y * stride;
|
||||||
|
Color::EncodeD24(value, address);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case registers.framebuffer.D24S8:
|
||||||
|
{
|
||||||
|
u32 stride = registers.framebuffer.width * 4;
|
||||||
|
// TODO(Subv): Implement the stencil buffer
|
||||||
|
Color::EncodeD24S8(value, 0, depth_buffer + VideoCore::GetMortonOffset(x, y, 4) + coarse_y * stride);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
|
||||||
|
@ -595,7 +637,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
|
||||||
u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
|
u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
|
||||||
v1.screenpos[2].ToFloat32() * w1 +
|
v1.screenpos[2].ToFloat32() * w1 +
|
||||||
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
|
||||||
u16 ref_z = GetDepth(x >> 4, y >> 4);
|
u32 ref_z = GetDepth(x >> 4, y >> 4);
|
||||||
|
|
||||||
bool pass = false;
|
bool pass = false;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue