shader: Initial OpenGL implementation

This commit is contained in:
ReinUsesLisp 2021-05-23 04:28:34 -03:00 committed by ameerj
parent 850b08a16c
commit d621e96d0d
38 changed files with 1427 additions and 705 deletions

View file

@ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() {
return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)};
}
Value IREmitter::LocalInvocationId() {
return Inst(Opcode::LocalInvocationId);
}
U32 IREmitter::LocalInvocationIdX() {
return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)};
}

View file

@ -95,6 +95,7 @@ public:
[[nodiscard]] U32 WorkgroupIdY();
[[nodiscard]] U32 WorkgroupIdZ();
[[nodiscard]] Value LocalInvocationId();
[[nodiscard]] U32 LocalInvocationIdX();
[[nodiscard]] U32 LocalInvocationIdY();
[[nodiscard]] U32 LocalInvocationIdZ();

View file

@ -120,6 +120,13 @@ enum class SpecialRegister : u64 {
case SpecialRegister::SR_INVOCATION_INFO:
// LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed");
return ir.Imm32(0x00ff'0000);
case SpecialRegister::SR_TID: {
const IR::Value tid{ir.LocalInvocationId()};
return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)},
IR::U32{ir.CompositeExtract(tid, 1)},
ir.Imm32(16), ir.Imm32(8)),
IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6));
}
case SpecialRegister::SR_TID_X:
return ir.LocalInvocationIdX();
case SpecialRegister::SR_TID_Y:

View file

@ -67,10 +67,14 @@ add_library(video_core STATIC
renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_buffer_cache.h
renderer_opengl/gl_compute_program.cpp
renderer_opengl/gl_compute_program.h
renderer_opengl/gl_device.cpp
renderer_opengl/gl_device.h
renderer_opengl/gl_fence_manager.cpp
renderer_opengl/gl_fence_manager.h
renderer_opengl/gl_graphics_program.cpp
renderer_opengl/gl_graphics_program.h
renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_resource_manager.cpp

View file

@ -70,8 +70,8 @@ class BufferCache {
P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX;
static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX;
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr BufferId NULL_BUFFER_ID{0};
@ -154,7 +154,7 @@ public:
void UnbindGraphicsTextureBuffers(size_t stage);
void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size,
PixelFormat format, bool is_written);
PixelFormat format, bool is_written, bool is_image);
void UnbindComputeStorageBuffers();
@ -164,7 +164,7 @@ public:
void UnbindComputeTextureBuffers();
void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format,
bool is_written);
bool is_written, bool is_image);
void FlushCachedWrites();
@ -197,6 +197,7 @@ public:
[[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size);
std::mutex mutex;
Runtime& runtime;
private:
template <typename Func>
@ -366,7 +367,6 @@ private:
Tegra::Engines::KeplerCompute& kepler_compute;
Tegra::MemoryManager& gpu_memory;
Core::Memory::Memory& cpu_memory;
Runtime& runtime;
SlotVector<Buffer> slot_buffers;
DelayedDestructionRing<Buffer, 8> delayed_destruction_ring;
@ -394,8 +394,10 @@ private:
std::array<u32, NUM_STAGES> enabled_texture_buffers{};
std::array<u32, NUM_STAGES> written_texture_buffers{};
std::array<u32, NUM_STAGES> image_texture_buffers{};
u32 enabled_compute_texture_buffers = 0;
u32 written_compute_texture_buffers = 0;
u32 image_compute_texture_buffers = 0;
std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{};
@ -431,8 +433,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
Runtime& runtime_)
: rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} {
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
// Ensure the first slot is used for the null buffer
void(slot_buffers.insert(runtime, NullBufferParams{}));
deletion_iterator = slot_buffers.end();
@ -703,13 +705,18 @@ template <class P>
void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) {
enabled_texture_buffers[stage] = 0;
written_texture_buffers[stage] = 0;
image_texture_buffers[stage] = 0;
}
template <class P>
void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr,
u32 size, PixelFormat format, bool is_written) {
u32 size, PixelFormat format, bool is_written,
bool is_image) {
enabled_texture_buffers[stage] |= 1U << tbo_index;
written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index;
}
texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
}
@ -717,6 +724,7 @@ template <class P>
void BufferCache<P>::UnbindComputeStorageBuffers() {
enabled_compute_storage_buffers = 0;
written_compute_storage_buffers = 0;
image_compute_texture_buffers = 0;
}
template <class P>
@ -737,13 +745,17 @@ template <class P>
void BufferCache<P>::UnbindComputeTextureBuffers() {
enabled_compute_texture_buffers = 0;
written_compute_texture_buffers = 0;
image_compute_texture_buffers = 0;
}
template <class P>
void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size,
PixelFormat format, bool is_written) {
PixelFormat format, bool is_written, bool is_image) {
enabled_compute_texture_buffers |= 1U << tbo_index;
written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index;
}
compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format);
}
@ -1057,7 +1069,6 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) {
template <class P>
void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
u32 binding_index = 0;
ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) {
const TextureBufferBinding& binding = texture_buffers[stage][index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@ -1066,9 +1077,12 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) {
runtime.BindTextureBuffer(binding_index, buffer, offset, size, format);
++binding_index;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((image_texture_buffers[stage] >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
}
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
}
@ -1139,7 +1153,6 @@ void BufferCache<P>::BindHostComputeStorageBuffers() {
template <class P>
void BufferCache<P>::BindHostComputeTextureBuffers() {
u32 binding_index = 0;
ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) {
const TextureBufferBinding& binding = compute_texture_buffers[index];
Buffer& buffer = slot_buffers[binding.buffer_id];
@ -1148,9 +1161,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() {
const u32 offset = buffer.Offset(binding.cpu_addr);
const PixelFormat format = binding.format;
if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) {
runtime.BindTextureBuffer(binding_index, buffer, offset, size, format);
++binding_index;
if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) {
if (((image_compute_texture_buffers >> index) & 1) != 0) {
runtime.BindImageBuffer(buffer, offset, size, format);
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
}
} else {
runtime.BindTextureBuffer(buffer, offset, size, format);
}
@ -1339,11 +1355,10 @@ void BufferCache<P>::UpdateComputeStorageBuffers() {
ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) {
// Resolve buffer
Binding& binding = compute_storage_buffers[index];
const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size);
binding.buffer_id = buffer_id;
binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size);
// Mark as written if needed
if (((written_compute_storage_buffers >> index) & 1) != 0) {
MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size);
MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size);
}
});
}

View file

@ -2,14 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <algorithm>
#include <span>
#include "video_core/buffer_cache/buffer_cache.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
namespace {
using VideoCore::Surface::PixelFormat;
struct BindlessSSBO {
GLuint64EXT address;
GLsizei length;
@ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept {
glMakeNamedBufferResidentNV(buffer.handle, access);
}
GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) {
return offset == view.offset && size == view.size && format == view.format;
})};
if (it != views.end()) {
return it->texture.handle;
}
OGLTexture texture;
texture.Create(GL_TEXTURE_BUFFER);
const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format};
glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size);
views.push_back({
.offset = offset,
.size = size,
.format = format,
.texture = std::move(texture),
});
return views.back().texture.handle;
}
BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()},
@ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0,
static_cast<GLsizeiptr>(size));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff
glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1,
reinterpret_cast<const GLuint*>(&ssbo));
} else {
const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer;
const GLuint base_binding = graphics_base_storage_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer,
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
}
void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
PixelFormat format) {
*texture_handles++ = buffer.View(offset, size, format);
}
void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) {
*image_handles++ = buffer.View(offset, size, format);
}
} // namespace OpenGL

View file

@ -32,6 +32,8 @@ public:
void MakeResident(GLenum access) noexcept;
[[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format);
[[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept {
return address;
}
@ -41,9 +43,17 @@ public:
}
private:
struct BufferView {
u32 offset;
u32 size;
VideoCore::Surface::PixelFormat format;
OGLTexture texture;
};
GLuint64EXT address = 0;
OGLBuffer buffer;
GLenum current_residency_access = GL_NONE;
std::vector<BufferView> views;
};
class BufferCacheRuntime {
@ -75,13 +85,19 @@ public:
void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size);
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
void BindImageBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format);
void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) {
if (use_assembly_shaders) {
const GLuint handle = fast_uniforms[stage][binding_index].handle;
const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size);
glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size);
} else {
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
fast_uniforms[stage][binding_index].handle, 0,
@ -103,7 +119,7 @@ public:
std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept {
const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size));
const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer;
const GLuint base_binding = graphics_base_uniform_bindings[stage];
const GLuint binding = base_binding + binding_index;
glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(),
static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size));
@ -118,6 +134,19 @@ public:
return has_fast_buffer_sub_data;
}
void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_uniform_bindings = bindings;
}
void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) {
graphics_base_storage_bindings = bindings;
}
void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) {
texture_handles = texture_handles_;
image_handles = image_handles_;
}
private:
static constexpr std::array PABO_LUT{
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
@ -133,6 +162,11 @@ private:
u32 max_attributes = 0;
std::array<GLuint, 5> graphics_base_uniform_bindings{};
std::array<GLuint, 5> graphics_base_storage_bindings{};
GLuint* texture_handles = nullptr;
GLuint* image_handles = nullptr;
std::optional<StreamBuffer> stream_buffer;
std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>,
@ -155,8 +189,8 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

View file

@ -0,0 +1,178 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "common/cityhash.h"
#include "video_core/renderer_opengl/gl_compute_program.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL {
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 16;
size_t ComputeProgramKey::Hash() const noexcept {
return static_cast<size_t>(
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this));
}
bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept {
return std::memcmp(this, &rhs, sizeof *this) == 0;
}
ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::KeplerCompute& kepler_compute_,
ProgramManager& program_manager_, OGLProgram program_,
const Shader::Info& info_)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
kepler_compute{kepler_compute_},
program_manager{program_manager_}, program{std::move(program_)}, info{info_} {
for (const auto& desc : info.texture_buffer_descriptors) {
num_texture_buffers += desc.count;
}
for (const auto& desc : info.image_buffer_descriptors) {
num_image_buffers += desc.count;
}
u32 num_textures = num_texture_buffers;
for (const auto& desc : info.texture_descriptors) {
num_textures += desc.count;
}
ASSERT(num_textures <= MAX_TEXTURES);
u32 num_images = num_image_buffers;
for (const auto& desc : info.image_descriptors) {
num_images += desc.count;
}
ASSERT(num_images <= MAX_IMAGES);
}
void ComputeProgram::Configure() {
buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask);
buffer_cache.UnbindComputeStorageBuffers();
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset,
desc.is_written);
++ssbo_index;
}
texture_cache.SynchronizeComputeDescriptors();
std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
std::array<GLuint, MAX_TEXTURES> samplers;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
GLsizei sampler_binding{};
GLsizei texture_binding{};
GLsizei image_binding{};
const auto& qmd{kepler_compute.launch_description};
const auto& cbufs{qmd.const_buffer_config};
const bool via_header_index{qmd.linked_tsc != 0};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0);
const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() +
secondary_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
}
}};
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
samplers[sampler_binding++] = 0;
}
}
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers[sampler_binding++] = sampler->Handle();
}
}
std::ranges::for_each(info.image_descriptors, add_image);
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
buffer_cache.UnbindComputeTextureBuffers();
size_t texbuf_index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])};
buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++texbuf_index;
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_buffer);
std::ranges::for_each(info.image_buffer_descriptors, add_buffer);
buffer_cache.UpdateComputeBuffers();
buffer_cache.runtime.SetImagePointers(textures.data(), images.data());
buffer_cache.BindHostComputeBuffers();
const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers};
texture_binding += num_texture_buffers;
image_binding += num_image_buffers;
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
textures[texture_binding++] = image_view.Handle(desc.type);
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
images[image_binding++] = image_view.Handle(desc.type);
}
}
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
glBindSamplers(0, sampler_binding, samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
program_manager.BindProgram(program.handle);
}
} // namespace OpenGL

View file

@ -0,0 +1,83 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <type_traits>
#include <utility>
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace Tegra {
class MemoryManager;
}
namespace Tegra::Engines {
class KeplerCompute;
}
namespace Shader {
struct Info;
}
namespace OpenGL {
class ProgramManager;
struct ComputeProgramKey {
u64 unique_hash;
u32 shared_memory_size;
std::array<u32, 3> workgroup_size;
size_t Hash() const noexcept;
bool operator==(const ComputeProgramKey&) const noexcept;
bool operator!=(const ComputeProgramKey& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::has_unique_object_representations_v<ComputeProgramKey>);
static_assert(std::is_trivially_copyable_v<ComputeProgramKey>);
static_assert(std::is_trivially_constructible_v<ComputeProgramKey>);
class ComputeProgram {
public:
explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::KeplerCompute& kepler_compute_,
ProgramManager& program_manager_, OGLProgram program_,
const Shader::Info& info_);
void Configure();
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::KeplerCompute& kepler_compute;
ProgramManager& program_manager;
OGLProgram program;
Shader::Info info;
u32 num_texture_buffers{};
u32 num_image_buffers{};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::ComputeProgramKey> {
size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View file

@ -22,34 +22,11 @@
namespace OpenGL {
namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
constexpr u32 NumStages = 5;
constexpr std::array LIMIT_UBOS = {
GL_MAX_VERTEX_UNIFORM_BLOCKS, GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_FRAGMENT_UNIFORM_BLOCKS, GL_MAX_COMPUTE_UNIFORM_BLOCKS,
};
constexpr std::array LIMIT_SSBOS = {
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS, GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
};
constexpr std::array LIMIT_SAMPLERS = {
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_TEXTURE_IMAGE_UNITS,
GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS,
};
constexpr std::array LIMIT_IMAGES = {
GL_MAX_VERTEX_IMAGE_UNIFORMS, GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS,
GL_MAX_FRAGMENT_IMAGE_UNIFORMS, GL_MAX_COMPUTE_IMAGE_UNIFORMS,
};
template <typename T>
T GetInteger(GLenum pname) {
@ -82,15 +59,6 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view
return std::ranges::find(extensions, extension) != extensions.end();
}
u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) {
ASSERT(num >= amount);
if (limit) {
amount = std::min(amount, GetInteger<u32>(*limit));
}
num -= amount;
return std::exchange(base, base + amount);
}
std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept {
std::array<u32, Tegra::Engines::MaxShaderTypes> max;
std::ranges::transform(LIMIT_UBOS, max.begin(),
@ -98,62 +66,6 @@ std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcep
return max;
}
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept {
std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings;
static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4};
const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS);
const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS);
const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS);
u32 num_ubos = total_ubos - ReservedUniformBlocks;
u32 num_ssbos = total_ssbos;
u32 num_samplers = total_samplers;
u32 base_ubo = ReservedUniformBlocks;
u32 base_ssbo = 0;
u32 base_samplers = 0;
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
bindings[stage] = {
Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]),
Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]),
Extract(base_samplers, num_samplers, total_samplers / NumStages,
LIMIT_SAMPLERS[stage])};
}
u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS);
u32 base_images = 0;
// GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
// Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
// fragment stage, and at least 1 for the rest of the stages.
// So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
// Reserve at least 4 image bindings on the fragment stage.
bindings[4].image =
Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]);
// This is guaranteed to be at least 1.
const u32 total_extracted_images = num_images / (NumStages - 1);
// Reserve the other image bindings.
for (std::size_t i = 0; i < NumStages; ++i) {
const std::size_t stage = stage_swizzle[i];
if (stage == 4) {
continue;
}
bindings[stage].image =
Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]);
}
// Compute doesn't care about any of this.
bindings[5] = {0, 0, 0, 0};
return bindings;
}
bool IsASTCSupported() {
static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY};
static constexpr std::array formats = {
@ -225,7 +137,6 @@ Device::Device() {
}
max_uniform_buffers = BuildMaxUniformBuffers();
base_bindings = BuildBaseBindings();
uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);

View file

@ -12,13 +12,6 @@ namespace OpenGL {
class Device {
public:
struct BaseBindings {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
u32 image{};
};
explicit Device();
explicit Device(std::nullptr_t);
@ -28,14 +21,6 @@ public:
return max_uniform_buffers[static_cast<std::size_t>(shader_type)];
}
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
return base_bindings[stage_index];
}
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment;
}
@ -134,7 +119,6 @@ private:
std::string vendor_name;
std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{};
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{};
size_t uniform_buffer_alignment{};
size_t shader_storage_alignment{};
u32 max_vertex_attributes{};

View file

@ -0,0 +1,296 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <cstring>
#include "common/cityhash.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_graphics_program.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
using VideoCommon::ImageId;
constexpr u32 MAX_TEXTURES = 64;
constexpr u32 MAX_IMAGES = 8;
size_t GraphicsProgramKey::Hash() const noexcept {
return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size()));
}
bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept {
return std::memcmp(this, &rhs, Size()) == 0;
}
GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
OGLProgram program_,
const std::array<const Shader::Info*, 5>& infos)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_},
gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_},
state_tracker{state_tracker_}, program{std::move(program_)} {
std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
u32 num_textures{};
u32 num_images{};
for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) {
const auto& info{stage_infos[stage]};
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
for (const auto& desc : info.constant_buffer_descriptors) {
base_uniform_bindings[stage + 1] += desc.count;
}
for (const auto& desc : info.storage_buffers_descriptors) {
base_storage_bindings[stage + 1] += desc.count;
}
for (const auto& desc : info.texture_buffer_descriptors) {
num_texture_buffers[stage] += desc.count;
num_textures += desc.count;
}
for (const auto& desc : info.image_buffer_descriptors) {
num_image_buffers[stage] += desc.count;
num_images += desc.count;
}
for (const auto& desc : info.texture_descriptors) {
num_textures += desc.count;
}
for (const auto& desc : info.image_descriptors) {
num_images += desc.count;
}
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
}
struct Spec {
static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true};
static constexpr bool has_storage_buffers = true;
static constexpr bool has_texture_buffers = true;
static constexpr bool has_image_buffers = true;
static constexpr bool has_images = true;
};
void GraphicsProgram::Configure(bool is_indexed) {
std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids;
std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices;
std::array<GLuint, MAX_TEXTURES> samplers;
size_t image_view_index{};
GLsizei sampler_binding{};
texture_cache.SynchronizeGraphicsDescriptors();
buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings);
buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings);
const auto& regs{maxwell3d.regs};
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
const auto config_stage{[&](size_t stage) {
const Shader::Info& info{stage_infos[stage]};
buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask);
buffer_cache.UnbindGraphicsStorageBuffers(stage);
if constexpr (Spec::has_storage_buffers) {
size_t ssbo_index{};
for (const auto& desc : info.storage_buffers_descriptors) {
ASSERT(desc.count == 1);
buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index,
desc.cbuf_offset, desc.is_written);
++ssbo_index;
}
}
const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers};
const auto read_handle{[&](const auto& desc, u32 index) {
ASSERT(cbufs[desc.cbuf_index].enabled);
const u32 index_offset{index << desc.size_shift};
const u32 offset{desc.cbuf_offset + index_offset};
const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset};
if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> ||
std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) {
if (desc.has_secondary) {
ASSERT(cbufs[desc.secondary_cbuf_index].enabled);
const u32 second_offset{desc.secondary_cbuf_offset + index_offset};
const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address +
second_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
const u32 raw{lhs_raw | rhs_raw};
return TexturePair(raw, via_header_index);
}
}
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
}
}};
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
samplers[sampler_binding++] = 0;
}
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
add_image(desc);
}
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const auto handle{read_handle(desc, index)};
image_view_indices[image_view_index++] = handle.first;
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_binding++] = sampler->Handle();
}
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
add_image(desc);
}
}
}};
if constexpr (Spec::enabled_stages[0]) {
config_stage(0);
}
if constexpr (Spec::enabled_stages[1]) {
config_stage(1);
}
if constexpr (Spec::enabled_stages[2]) {
config_stage(2);
}
if constexpr (Spec::enabled_stages[3]) {
config_stage(3);
}
if constexpr (Spec::enabled_stages[4]) {
config_stage(4);
}
const std::span indices_span(image_view_indices.data(), image_view_index);
texture_cache.FillGraphicsImageViews(indices_span, image_view_ids);
ImageId* texture_buffer_index{image_view_ids.data()};
const auto bind_stage_info{[&](size_t stage) {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written, is_image);
++index;
++texture_buffer_index;
}
}};
const Shader::Info& info{stage_infos[stage]};
buffer_cache.UnbindGraphicsTextureBuffers(stage);
if constexpr (Spec::has_texture_buffers) {
for (const auto& desc : info.texture_buffer_descriptors) {
add_buffer(desc);
}
}
if constexpr (Spec::has_image_buffers) {
for (const auto& desc : info.image_buffer_descriptors) {
add_buffer(desc);
}
}
for (const auto& desc : info.texture_descriptors) {
texture_buffer_index += desc.count;
}
if constexpr (Spec::has_images) {
for (const auto& desc : info.image_descriptors) {
texture_buffer_index += desc.count;
}
}
}};
if constexpr (Spec::enabled_stages[0]) {
bind_stage_info(0);
}
if constexpr (Spec::enabled_stages[1]) {
bind_stage_info(1);
}
if constexpr (Spec::enabled_stages[2]) {
bind_stage_info(2);
}
if constexpr (Spec::enabled_stages[3]) {
bind_stage_info(3);
}
if constexpr (Spec::enabled_stages[4]) {
bind_stage_info(4);
}
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
const ImageId* views_it{image_view_ids.data()};
GLsizei texture_binding = 0;
GLsizei image_binding = 0;
std::array<GLuint, MAX_TEXTURES> textures;
std::array<GLuint, MAX_IMAGES> images;
const auto prepare_stage{[&](size_t stage) {
buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]);
buffer_cache.BindHostStageBuffers(stage);
texture_binding += num_texture_buffers[stage];
image_binding += num_image_buffers[stage];
const auto& info{stage_infos[stage]};
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
textures[texture_binding++] = image_view.Handle(desc.type);
}
}
for (const auto& desc : info.image_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
ImageView& image_view{texture_cache.GetImageView(*(views_it++))};
images[image_binding++] = image_view.Handle(desc.type);
}
}
}};
if constexpr (Spec::enabled_stages[0]) {
prepare_stage(0);
}
if constexpr (Spec::enabled_stages[1]) {
prepare_stage(1);
}
if constexpr (Spec::enabled_stages[2]) {
prepare_stage(2);
}
if constexpr (Spec::enabled_stages[3]) {
prepare_stage(3);
}
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
if (texture_binding != 0) {
ASSERT(texture_binding == sampler_binding);
glBindTextures(0, texture_binding, textures.data());
glBindSamplers(0, sampler_binding, samplers.data());
}
if (image_binding != 0) {
glBindImageTextures(0, image_binding, images.data());
}
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindProgram(program.handle);
}
} // namespace OpenGL

View file

@ -0,0 +1,105 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <array>
#include <type_traits>
#include <utility>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
class ProgramManager;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsProgramKey {
struct TransformFeedbackState {
struct Layout {
u32 stream;
u32 varying_count;
u32 stride;
};
std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
};
std::array<u64, 6> unique_hashes;
union {
u32 raw;
BitField<0, 1, u32> xfb_enabled;
BitField<1, 1, u32> early_z;
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive;
BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
};
std::array<u32, 3> padding;
TransformFeedbackState xfb_state;
size_t Hash() const noexcept;
bool operator==(const GraphicsProgramKey&) const noexcept;
bool operator!=(const GraphicsProgramKey& rhs) const noexcept {
return !operator==(rhs);
}
[[nodiscard]] size_t Size() const noexcept {
if (xfb_enabled != 0) {
return sizeof(GraphicsProgramKey);
} else {
return offsetof(GraphicsProgramKey, padding);
}
}
};
static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
class GraphicsProgram {
public:
explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_,
Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
OGLProgram program_, const std::array<const Shader::Info*, 5>& infos);
void Configure(bool is_indexed);
private:
TextureCache& texture_cache;
BufferCache& buffer_cache;
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
ProgramManager& program_manager;
StateTracker& state_tracker;
OGLProgram program;
std::array<Shader::Info, 5> stage_infos{};
std::array<u32, 5> base_uniform_bindings{};
std::array<u32, 5> base_storage_bindings{};
std::array<u32, 5> num_texture_buffers{};
std::array<u32, 5> num_image_buffers{};
};
} // namespace OpenGL
namespace std {
template <>
struct hash<OpenGL::GraphicsProgramKey> {
size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept {
return k.Hash();
}
};
} // namespace std

View file

@ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device),
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
buffer_cache, program_manager, state_tracker),
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
@ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
SyncState();
// Setup shaders and their used resources.
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()};
texture_cache.UpdateRenderTargets(false);
state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle());
program_manager.BindGraphicsPipeline();
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
program->Configure(is_indexed);
const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology);
BeginTransformFeedback(primitive_mode);
@ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
EndTransformFeedback();
++num_queued_commands;
@ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
}
void RasterizerOpenGL::DispatchCompute() {
UNREACHABLE_MSG("Not implemented");
ComputeProgram* const program{shader_cache.CurrentComputeProgram()};
if (!program) {
return;
}
program->Configure();
const auto& qmd{kepler_compute.launch_description};
glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z);
++num_queued_commands;
}
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
@ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
// ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
// ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
screen_info.display_texture = image_view->Handle(ImageViewType::e2D);
screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D);
screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format);
return true;
}

View file

@ -16,6 +16,11 @@
#include "common/scope_exit.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
#include "shader_recompiler/frontend/ir/program.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/frontend/maxwell/program.h"
#include "shader_recompiler/profile.h"
#include "video_core/engines/kepler_compute.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
@ -25,17 +30,281 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_cache.h"
#include "video_core/shader_environment.h"
#include "video_core/shader_notify.h"
namespace OpenGL {
namespace {
// FIXME: Move this somewhere else
const Shader::Profile profile{
.supported_spirv = 0x00010000,
.unified_descriptor_binding = false,
.support_descriptor_aliasing = false,
.support_int8 = false,
.support_int16 = false,
.support_vertex_instance_id = true,
.support_float_controls = false,
.support_separate_denorm_behavior = false,
.support_separate_rounding_mode = false,
.support_fp16_denorm_preserve = false,
.support_fp32_denorm_preserve = false,
.support_fp16_denorm_flush = false,
.support_fp32_denorm_flush = false,
.support_fp16_signed_zero_nan_preserve = false,
.support_fp32_signed_zero_nan_preserve = false,
.support_fp64_signed_zero_nan_preserve = false,
.support_explicit_workgroup_layout = false,
.support_vote = true,
.support_viewport_index_layer_non_geometry = true,
.support_viewport_mask = true,
.support_typeless_image_loads = true,
.support_demote_to_helper_invocation = false,
.warp_size_potentially_larger_than_guest = true,
.support_int64_atomics = false,
.lower_left_origin_mode = true,
.has_broken_spirv_clamp = true,
.has_broken_unsigned_image_offsets = true,
.has_broken_signed_operations = true,
.ignore_nan_fp_comparisons = true,
.generic_input_types = {},
.convert_depth_mode = false,
.force_early_z = false,
.tess_primitive = {},
.tess_spacing = {},
.tess_clockwise = false,
.input_topology = Shader::InputTopology::Triangles,
.fixed_state_point_size = std::nullopt,
.alpha_test_func = Shader::CompareFunction::Always,
.alpha_test_reference = 0.0f,
.y_negate = false,
.xfb_varyings = {},
};
using Shader::Backend::SPIRV::EmitSPIRV;
using Shader::Maxwell::TranslateProgram;
using VideoCommon::ComputeEnvironment;
using VideoCommon::GraphicsEnvironment;
template <typename Container>
auto MakeSpan(Container& container) {
return std::span(container.data(), container.size());
}
void AddShader(GLenum stage, GLuint program, std::span<const u32> code) {
OGLShader shader;
shader.handle = glCreateShader(stage);
glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(),
static_cast<GLsizei>(code.size_bytes()));
glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr);
glAttachShader(program, shader.handle);
if (!Settings::values.renderer_debug) {
return;
}
GLint shader_status{};
glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status);
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "Failed to build shader");
}
GLint log_length{};
glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data());
if (shader_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
}
void LinkProgram(GLuint program) {
glLinkProgram(program);
if (!Settings::values.renderer_debug) {
return;
}
GLint link_status{};
glGetProgramiv(program, GL_LINK_STATUS, &link_status);
GLint log_length{};
glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
if (log_length == 0) {
return;
}
std::string log(log_length, 0);
glGetProgramInfoLog(program, log_length, nullptr, log.data());
if (link_status == GL_FALSE) {
LOG_ERROR(Render_OpenGL, "{}", log);
} else {
LOG_WARNING(Render_OpenGL, "{}", log);
}
}
GLenum Stage(size_t stage_index) {
switch (stage_index) {
case 0:
return GL_VERTEX_SHADER;
case 1:
return GL_TESS_CONTROL_SHADER;
case 2:
return GL_TESS_EVALUATION_SHADER;
case 3:
return GL_GEOMETRY_SHADER;
case 4:
return GL_FRAGMENT_SHADER;
}
UNREACHABLE_MSG("{}", stage_index);
return GL_NONE;
}
} // Anonymous namespace
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_)
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_)
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
emu_window{emu_window_}, gpu{gpu_}, device{device_} {}
emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{
state_tracker_} {}
ShaderCache::~ShaderCache() = default;
GraphicsProgram* ShaderCache::CurrentGraphicsProgram() {
if (!RefreshStages(graphics_key.unique_hashes)) {
return nullptr;
}
const auto& regs{maxwell3d.regs};
graphics_key.raw = 0;
graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
? regs.draw.topology.Value()
: Maxwell::PrimitiveTopology{});
graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value());
graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value());
graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value());
const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)};
auto& program{pair->second};
if (is_new) {
program = CreateGraphicsProgram();
}
return program.get();
}
ComputeProgram* ShaderCache::CurrentComputeProgram() {
const VideoCommon::ShaderInfo* const shader{ComputeShader()};
if (!shader) {
return nullptr;
}
const auto& qmd{kepler_compute.launch_description};
const ComputeProgramKey key{
.unique_hash = shader->unique_hash,
.shared_memory_size = qmd.shared_alloc,
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
};
const auto [pair, is_new]{compute_cache.try_emplace(key)};
auto& pipeline{pair->second};
if (!is_new) {
return pipeline.get();
}
pipeline = CreateComputeProgram(key, shader);
return pipeline.get();
}
std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram() {
GraphicsEnvironments environments;
GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
main_pools.ReleaseContents();
return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true);
}
std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram(
ShaderPools& pools, const GraphicsProgramKey& key, std::span<Shader::Environment* const> envs,
bool build_in_parallel) {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
size_t env_index{0};
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
if (key.unique_hashes[index] == 0) {
continue;
}
Shader::Environment& env{*envs[env_index]};
++env_index;
const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))};
Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset);
programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg);
}
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
OGLProgram gl_program;
gl_program.handle = glCreateProgram();
Shader::Backend::SPIRV::Bindings binding;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
if (key.unique_hashes[index] == 0) {
continue;
}
UNIMPLEMENTED_IF(index == 0);
Shader::IR::Program& program{programs[index]};
const size_t stage_index{index - 1};
infos[stage_index] = &program.info;
const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
FILE* file = fopen("D:\\shader.spv", "wb");
fwrite(code.data(), 4, code.size(), file);
fclose(file);
AddShader(Stage(stage_index), gl_program.handle, code);
}
LinkProgram(gl_program.handle);
return std::make_unique<GraphicsProgram>(texture_cache, buffer_cache, gpu_memory, maxwell3d,
program_manager, state_tracker, std::move(gl_program),
infos);
}
std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(
const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) {
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
const auto& qmd{kepler_compute.launch_description};
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
env.SetCachedSize(shader->size_bytes);
main_pools.ReleaseContents();
return CreateComputeProgram(main_pools, key, env, true);
}
std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(ShaderPools& pools,
const ComputeProgramKey& key,
Shader::Environment& env,
bool build_in_parallel) {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)};
Shader::Backend::SPIRV::Bindings binding;
const std::vector<u32> code{EmitSPIRV(profile, program, binding)};
OGLProgram gl_program;
gl_program.handle = glCreateProgram();
AddShader(GL_COMPUTE_SHADER, gl_program.handle, code);
LinkProgram(gl_program.handle);
return std::make_unique<ComputeProgram>(texture_cache, buffer_cache, gpu_memory, kepler_compute,
program_manager, std::move(gl_program), program.info);
}
} // namespace OpenGL

View file

@ -5,20 +5,18 @@
#pragma once
#include <array>
#include <atomic>
#include <bitset>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <glad/glad.h>
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_compute_program.h"
#include "video_core/renderer_opengl/gl_graphics_program.h"
#include "video_core/shader_cache.h"
namespace Tegra {
@ -32,64 +30,62 @@ class EmuWindow;
namespace OpenGL {
class Device;
class ProgramManager;
class RasterizerOpenGL;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsProgramKey {
struct TransformFeedbackState {
struct Layout {
u32 stream;
u32 varying_count;
u32 stride;
};
std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
};
std::array<u64, 6> unique_hashes;
std::array<u8, Maxwell::NumRenderTargets> color_formats;
union {
u32 raw;
BitField<0, 1, u32> xfb_enabled;
BitField<1, 1, u32> early_z;
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
BitField<6, 2, u32> tessellation_primitive;
BitField<8, 2, u32> tessellation_spacing;
BitField<10, 1, u32> tessellation_clockwise;
};
u32 padding;
TransformFeedbackState xfb_state;
[[nodiscard]] size_t Size() const noexcept {
if (xfb_enabled != 0) {
return sizeof(GraphicsProgramKey);
} else {
return offsetof(GraphicsProgramKey, padding);
}
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
class GraphicsProgram {
public:
private:
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
class ShaderCache : public VideoCommon::ShaderCache {
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_);
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_);
~ShaderCache();
[[nodiscard]] GraphicsProgram* CurrentGraphicsProgram();
[[nodiscard]] ComputeProgram* CurrentComputeProgram();
private:
std::unique_ptr<GraphicsProgram> CreateGraphicsProgram();
std::unique_ptr<GraphicsProgram> CreateGraphicsProgram(
ShaderPools& pools, const GraphicsProgramKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel);
std::unique_ptr<ComputeProgram> CreateComputeProgram(const ComputeProgramKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputeProgram> CreateComputeProgram(ShaderPools& pools,
const ComputeProgramKey& key,
Shader::Environment& env,
bool build_in_parallel);
Core::Frontend::EmuWindow& emu_window;
Tegra::GPU& gpu;
const Device& device;
TextureCache& texture_cache;
BufferCache& buffer_cache;
ProgramManager& program_manager;
StateTracker& state_tracker;
GraphicsProgramKey graphics_key{};
ShaderPools main_pools;
std::unordered_map<GraphicsProgramKey, std::unique_ptr<GraphicsProgram>> graphics_cache;
std::unordered_map<ComputeProgramKey, std::unique_ptr<ComputeProgram>> compute_cache;
};
} // namespace OpenGL

View file

@ -1,149 +1,3 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <glad/glad.h>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL {
namespace {
void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) {
if (current == old) {
return;
}
if (current == 0) {
if (enabled) {
enabled = false;
glDisable(stage);
}
return;
}
if (!enabled) {
enabled = true;
glEnable(stage);
}
glBindProgramARB(stage, current);
}
} // Anonymous namespace
ProgramManager::ProgramManager(const Device& device)
: use_assembly_programs{device.UseAssemblyShaders()} {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
} else {
graphics_pipeline.Create();
glBindProgramPipeline(graphics_pipeline.handle);
}
}
ProgramManager::~ProgramManager() = default;
void ProgramManager::BindCompute(GLuint program) {
if (use_assembly_programs) {
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
} else {
is_graphics_bound = false;
glUseProgram(program);
}
}
void ProgramManager::BindGraphicsPipeline() {
if (!use_assembly_programs) {
UpdateSourcePrograms();
}
}
void ProgramManager::BindHostPipeline(GLuint pipeline) {
if (use_assembly_programs) {
if (geometry_enabled) {
geometry_enabled = false;
old_state.geometry = 0;
glDisable(GL_GEOMETRY_PROGRAM_NV);
}
} else {
if (!is_graphics_bound) {
glUseProgram(0);
}
}
glBindProgramPipeline(pipeline);
}
void ProgramManager::RestoreGuestPipeline() {
if (use_assembly_programs) {
glBindProgramPipeline(0);
} else {
glBindProgramPipeline(graphics_pipeline.handle);
}
}
void ProgramManager::BindHostCompute(GLuint program) {
if (use_assembly_programs) {
glDisable(GL_COMPUTE_PROGRAM_NV);
}
glUseProgram(program);
is_graphics_bound = false;
}
void ProgramManager::RestoreGuestCompute() {
if (use_assembly_programs) {
glEnable(GL_COMPUTE_PROGRAM_NV);
glUseProgram(0);
}
}
void ProgramManager::UseVertexShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled);
}
current_state.vertex = program;
}
void ProgramManager::UseGeometryShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled);
}
current_state.geometry = program;
}
void ProgramManager::UseFragmentShader(GLuint program) {
if (use_assembly_programs) {
BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled);
}
current_state.fragment = program;
}
void ProgramManager::UpdateSourcePrograms() {
if (!is_graphics_bound) {
is_graphics_bound = true;
glUseProgram(0);
}
const GLuint handle = graphics_pipeline.handle;
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
if (current == old) {
return;
}
glUseProgramStages(handle, stage, current);
};
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
old_state = current_state;
}
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
}
} // namespace OpenGL

View file

@ -4,79 +4,24 @@
#pragma once
#include <cstddef>
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL {
class Device;
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
/// the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
/// Not following that rule will cause problems on some AMD drivers.
struct alignas(16) MaxwellUniformData {
void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
GLfloat y_direction;
};
static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
class ProgramManager {
public:
explicit ProgramManager(const Device& device);
~ProgramManager();
void BindProgram(GLuint program) {
if (bound_program == program) {
return;
}
bound_program = program;
glUseProgram(program);
}
/// Binds a compute program
void BindCompute(GLuint program);
/// Updates bound programs.
void BindGraphicsPipeline();
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
void BindHostPipeline(GLuint pipeline);
/// Rewinds BindHostPipeline state changes.
void RestoreGuestPipeline();
/// Binds an OpenGL GLSL program object unsynchronized with the guest state.
void BindHostCompute(GLuint program);
/// Rewinds BindHostCompute state changes.
void RestoreGuestCompute();
void UseVertexShader(GLuint program);
void UseGeometryShader(GLuint program);
void UseFragmentShader(GLuint program);
void RestoreGuestCompute() {}
private:
struct PipelineState {
GLuint vertex = 0;
GLuint geometry = 0;
GLuint fragment = 0;
};
/// Update GLSL programs.
void UpdateSourcePrograms();
OGLPipeline graphics_pipeline;
PipelineState current_state;
PipelineState old_state;
bool use_assembly_programs = false;
bool is_graphics_bound = true;
bool vertex_enabled = false;
bool geometry_enabled = false;
bool fragment_enabled = false;
GLuint bound_program = 0;
};
} // namespace OpenGL

View file

@ -24,9 +24,7 @@
#include "video_core/textures/decoders.h"
namespace OpenGL {
namespace {
using Tegra::Texture::SwizzleSource;
using Tegra::Texture::TextureMipmapFilter;
using Tegra::Texture::TextureType;
@ -59,107 +57,6 @@ struct CopyRegion {
GLsizei depth;
};
struct FormatTuple {
GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
constexpr std::array ACCELERATED_FORMATS{
GL_RGBA32F, GL_RGBA16F, GL_RG32F, GL_RG16F, GL_R11F_G11F_B10F, GL_R32F,
GL_R16F, GL_RGBA32UI, GL_RGBA16UI, GL_RGB10_A2UI, GL_RGBA8UI, GL_RG32UI,
@ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{
GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM,
};
const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
switch (info.type) {
case ImageType::e1D:
@ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) {
return GL_NONE;
}
GLenum ImageTarget(ImageViewType type, int num_samples = 1) {
GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) {
const bool is_multisampled = num_samples > 1;
switch (type) {
case ImageViewType::e1D:
case Shader::TextureType::Color1D:
return GL_TEXTURE_1D;
case ImageViewType::e2D:
case Shader::TextureType::Color2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D;
case ImageViewType::Cube:
case Shader::TextureType::ColorCube:
return GL_TEXTURE_CUBE_MAP;
case ImageViewType::e3D:
case Shader::TextureType::Color3D:
return GL_TEXTURE_3D;
case ImageViewType::e1DArray:
case Shader::TextureType::ColorArray1D:
return GL_TEXTURE_1D_ARRAY;
case ImageViewType::e2DArray:
case Shader::TextureType::ColorArray2D:
return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY;
case ImageViewType::CubeArray:
case Shader::TextureType::ColorArrayCube:
return GL_TEXTURE_CUBE_MAP_ARRAY;
case ImageViewType::Rect:
return GL_TEXTURE_RECTANGLE;
case ImageViewType::Buffer:
case Shader::TextureType::Buffer:
return GL_TEXTURE_BUFFER;
}
UNREACHABLE_MSG("Invalid image view type={}", type);
@ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
default:
return false;
}
const GLenum internal_format = GetFormatTuple(info.format).internal_format;
const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format;
const auto& format_info = runtime.FormatInfo(info.type, internal_format);
if (format_info.is_compressed) {
return false;
@ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) {
const GLuint texture = image_view->DefaultHandle();
glNamedFramebufferTexture(fbo, attachment, texture, 0);
glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0);
return;
}
const GLuint texture = image_view->Handle(ImageViewType::e3D);
const GLuint texture = image_view->Handle(Shader::TextureType::Color3D);
if (image_view->range.extent.layers > 1) {
// TODO: OpenGL doesn't support rendering to a fixed number of slices
glNamedFramebufferTexture(fbo, attachment, texture, 0);
@ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i];
for (const FormatTuple& tuple : FORMAT_TABLE) {
for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) {
const GLenum format = tuple.internal_format;
GLint compat_class;
GLint compat_type;
@ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY);
null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY);
null_image_3d.Create(GL_TEXTURE_3D);
null_image_rect.Create(GL_TEXTURE_RECTANGLE);
glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1);
glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6);
glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1);
glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1);
std::array<GLuint, 4> new_handles;
glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data());
@ -496,29 +383,28 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager&
glTextureView(null_image_view_cube.handle, GL_TEXTURE_CUBE_MAP, null_image_cube_array.handle,
GL_R8, 0, 1, 0, 6);
const std::array texture_handles{
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
null_image_rect.handle, null_image_view_1d.handle, null_image_view_2d.handle,
null_image_view_2d_array.handle, null_image_view_cube.handle,
null_image_1d_array.handle, null_image_cube_array.handle, null_image_3d.handle,
null_image_view_1d.handle, null_image_view_2d.handle, null_image_view_2d_array.handle,
null_image_view_cube.handle,
};
for (const GLuint handle : texture_handles) {
static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO};
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data());
}
const auto set_view = [this](ImageViewType type, GLuint handle) {
const auto set_view = [this](Shader::TextureType type, GLuint handle) {
if (device.HasDebuggingToolAttached()) {
const std::string name = fmt::format("NullImage {}", type);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
null_image_views[static_cast<size_t>(type)] = handle;
};
set_view(ImageViewType::e1D, null_image_view_1d.handle);
set_view(ImageViewType::e2D, null_image_view_2d.handle);
set_view(ImageViewType::Cube, null_image_view_cube.handle);
set_view(ImageViewType::e3D, null_image_3d.handle);
set_view(ImageViewType::e1DArray, null_image_1d_array.handle);
set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle);
set_view(ImageViewType::CubeArray, null_image_cube_array.handle);
set_view(ImageViewType::Rect, null_image_rect.handle);
set_view(Shader::TextureType::Color1D, null_image_view_1d.handle);
set_view(Shader::TextureType::Color2D, null_image_view_2d.handle);
set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle);
set_view(Shader::TextureType::Color3D, null_image_3d.handle);
set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle);
set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle);
set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle);
}
TextureCacheRuntime::~TextureCacheRuntime() = default;
@ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
gl_format = GL_RGBA;
gl_type = GL_UNSIGNED_INT_8_8_8_8_REV;
} else {
const auto& tuple = GetFormatTuple(info.format);
const auto& tuple = MaxwellToGL::GetFormatTuple(info.format);
gl_internal_format = tuple.internal_format;
gl_format = tuple.format;
gl_type = tuple.type;
@ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_,
glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth);
break;
case GL_TEXTURE_BUFFER:
buffer.Create();
glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0);
UNREACHABLE();
break;
default:
UNREACHABLE_MSG("Invalid target=0x{:x}", target);
@ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map,
}
}
void Image::UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset,
copy.dst_offset, copy.size);
}
}
void Image::DownloadMemory(ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) {
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
@ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
if (True(image.flags & ImageFlagBits::Converted)) {
internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} else {
internal_format = GetFormatTuple(format).internal_format;
internal_format = MaxwellToGL::GetFormatTuple(format).internal_format;
}
VideoCommon::SubresourceRange flatten_range = info.range;
std::array<GLuint, 2> handles;
@ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
[[fallthrough]];
case ImageViewType::e1D:
glGenTextures(2, handles.data());
SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range);
SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range);
SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range);
SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range);
break;
case ImageViewType::e2DArray:
flatten_range.extent.layers = 1;
@ -985,37 +862,65 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
.extent = {.levels = 1, .layers = 1},
};
glGenTextures(1, handles.data());
SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range);
break;
SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range);
} else {
glGenTextures(2, handles.data());
SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range);
SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info,
info.range);
}
glGenTextures(2, handles.data());
SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range);
SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range);
break;
case ImageViewType::e3D:
glGenTextures(1, handles.data());
SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range);
SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range);
break;
case ImageViewType::CubeArray:
flatten_range.extent.layers = 6;
[[fallthrough]];
case ImageViewType::Cube:
glGenTextures(2, handles.data());
SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range);
SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range);
SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range);
SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range);
break;
case ImageViewType::Rect:
glGenTextures(1, handles.data());
SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range);
UNIMPLEMENTED();
break;
case ImageViewType::Buffer:
glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data());
SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range);
UNREACHABLE();
break;
}
switch (info.type) {
case ImageViewType::e1D:
default_handle = Handle(Shader::TextureType::Color1D);
break;
case ImageViewType::e1DArray:
default_handle = Handle(Shader::TextureType::ColorArray1D);
break;
case ImageViewType::e2D:
default_handle = Handle(Shader::TextureType::Color2D);
break;
case ImageViewType::e2DArray:
default_handle = Handle(Shader::TextureType::ColorArray2D);
break;
case ImageViewType::e3D:
default_handle = Handle(Shader::TextureType::Color3D);
break;
case ImageViewType::Cube:
default_handle = Handle(Shader::TextureType::ColorCube);
break;
case ImageViewType::CubeArray:
default_handle = Handle(Shader::TextureType::ColorArrayCube);
break;
default:
break;
}
default_handle = Handle(info.type);
}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_)
: VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_},
buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {}
ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info)
: VideoCommon::ImageViewBase{info, view_info} {}
@ -1023,24 +928,18 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params)
: VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {}
void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type,
void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type,
GLuint handle, const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range) {
if (info.type == ImageViewType::Buffer) {
// TODO: Take offset from buffer cache
glTextureBufferRange(handle, internal_format, image.buffer.handle, 0,
image.guest_size_bytes);
} else {
const GLuint parent = image.texture.handle;
const GLenum target = ImageTarget(view_type, image.info.num_samples);
glTextureView(handle, target, parent, internal_format, view_range.base.level,
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
if (!info.IsRenderTarget()) {
ApplySwizzle(handle, format, info.Swizzle());
}
const GLuint parent = image.texture.handle;
const GLenum target = ImageTarget(view_type, image.info.num_samples);
glTextureView(handle, target, parent, internal_format, view_range.base.level,
view_range.extent.levels, view_range.base.layer, view_range.extent.layers);
if (!info.IsRenderTarget()) {
ApplySwizzle(handle, format, info.Swizzle());
}
if (device.HasDebuggingToolAttached()) {
const std::string name = VideoCommon::Name(*this, view_type);
const std::string name = VideoCommon::Name(*this);
glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data());
}
stored_views.emplace_back().handle = handle;

View file

@ -9,6 +9,7 @@
#include <glad/glad.h>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/texture_cache.h"
@ -127,13 +128,12 @@ private:
OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array;
OGLTexture null_image_3d;
OGLTexture null_image_rect;
OGLTextureView null_image_view_1d;
OGLTextureView null_image_view_2d;
OGLTextureView null_image_view_2d_array;
OGLTextureView null_image_view_cube;
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views;
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{};
};
class Image : public VideoCommon::ImageBase {
@ -154,8 +154,6 @@ public:
void UploadMemory(const ImageBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept;
@ -170,7 +168,6 @@ private:
void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset);
OGLTexture texture;
OGLBuffer buffer;
OGLTextureView store_view;
GLenum gl_internal_format = GL_NONE;
GLenum gl_format = GL_NONE;
@ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase {
public:
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&,
const VideoCommon::ImageViewInfo&, GPUVAddr);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info,
const VideoCommon::ImageViewInfo& view_info);
explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&);
[[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept {
return views[static_cast<size_t>(query_type)];
[[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept {
return views[static_cast<size_t>(handle_type)];
}
[[nodiscard]] GLuint DefaultHandle() const noexcept {
@ -198,15 +197,25 @@ public:
return internal_format;
}
[[nodiscard]] GPUVAddr GpuAddr() const noexcept {
return gpu_addr;
}
[[nodiscard]] u32 BufferSize() const noexcept {
return buffer_size;
}
private:
void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle,
void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle,
const VideoCommon::ImageViewInfo& info,
VideoCommon::SubresourceRange view_range);
std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{};
std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{};
std::vector<OGLTextureView> stored_views;
GLuint default_handle = 0;
GLenum internal_format = GL_NONE;
GLuint default_handle = 0;
GPUVAddr gpu_addr = 0;
u32 buffer_size = 0;
};
class ImageAlloc : public VideoCommon::ImageAllocBase {};

View file

@ -5,12 +5,120 @@
#pragma once
#include <glad/glad.h>
#include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
namespace OpenGL::MaxwellToGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct FormatTuple {
GLenum internal_format;
GLenum format = GL_NONE;
GLenum type = GL_NONE;
};
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_UNORM
{GL_RGBA8_SNORM, GL_RGBA, GL_BYTE}, // A8B8G8R8_SNORM
{GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE}, // A8B8G8R8_SINT
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE}, // A8B8G8R8_UINT
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5}, // R5G6B5_UNORM
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV}, // B5G6R5_UNORM
{GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1R5G5B5_UNORM
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UNORM
{GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV}, // A1B5G5R5_UNORM
{GL_R8, GL_RED, GL_UNSIGNED_BYTE}, // R8_UNORM
{GL_R8_SNORM, GL_RED, GL_BYTE}, // R8_SNORM
{GL_R8I, GL_RED_INTEGER, GL_BYTE}, // R8_SINT
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE}, // R8_UINT
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16A16_FLOAT
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT}, // R16G16B16A16_UNORM
{GL_RGBA16_SNORM, GL_RGBA, GL_SHORT}, // R16G16B16A16_SNORM
{GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT}, // R16G16B16A16_SINT
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT}, // R16G16B16A16_UINT
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV}, // B10G11R11_FLOAT
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT}, // R32G32B32A32_UINT
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT}, // BC1_RGBA_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT}, // BC2_UNORM
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT}, // BC3_UNORM
{GL_COMPRESSED_RED_RGTC1}, // BC4_UNORM
{GL_COMPRESSED_SIGNED_RED_RGTC1}, // BC4_SNORM
{GL_COMPRESSED_RG_RGTC2}, // BC5_UNORM
{GL_COMPRESSED_SIGNED_RG_RGTC2}, // BC5_SNORM
{GL_COMPRESSED_RGBA_BPTC_UNORM}, // BC7_UNORM
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
{GL_RG32I, GL_RG_INTEGER, GL_INT}, // R32G32_SINT
{GL_R32F, GL_RED, GL_FLOAT}, // R32_FLOAT
{GL_R16F, GL_RED, GL_HALF_FLOAT}, // R16_FLOAT
{GL_R16, GL_RED, GL_UNSIGNED_SHORT}, // R16_UNORM
{GL_R16_SNORM, GL_RED, GL_SHORT}, // R16_SNORM
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT}, // R16_UINT
{GL_R16I, GL_RED_INTEGER, GL_SHORT}, // R16_SINT
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT}, // R16G16_UNORM
{GL_RG16F, GL_RG, GL_HALF_FLOAT}, // R16G16_FLOAT
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT}, // R16G16_UINT
{GL_RG16I, GL_RG_INTEGER, GL_SHORT}, // R16G16_SINT
{GL_RG16_SNORM, GL_RG, GL_SHORT}, // R16G16_SNORM
{GL_RGB32F, GL_RGB, GL_FLOAT}, // R32G32B32_FLOAT
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV}, // A8B8G8R8_SRGB
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE}, // R8G8_UNORM
{GL_RG8_SNORM, GL_RG, GL_BYTE}, // R8G8_SNORM
{GL_RG8I, GL_RG_INTEGER, GL_BYTE}, // R8G8_SINT
{GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE}, // R8G8_UINT
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT}, // R32G32_UINT
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT}, // R16G16B16X16_FLOAT
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT}, // R32_UINT
{GL_R32I, GL_RED_INTEGER, GL_INT}, // R32_SINT
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM}, // BC7_SRGB
{GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV}, // A4B4G4R4_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR}, // ASTC_2D_4X4_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR}, // ASTC_2D_8X8_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR}, // ASTC_2D_8X5_SRGB
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR}, // ASTC_2D_5X4_SRGB
{GL_COMPRESSED_RGBA_ASTC_5x5_KHR}, // ASTC_2D_5X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR}, // ASTC_2D_5X5_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x8_KHR}, // ASTC_2D_10X8_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR}, // ASTC_2D_10X8_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x6_KHR}, // ASTC_2D_6X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR}, // ASTC_2D_6X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_10x10_KHR}, // ASTC_2D_10X10_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR}, // ASTC_2D_10X10_SRGB
{GL_COMPRESSED_RGBA_ASTC_12x12_KHR}, // ASTC_2D_12X12_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR}, // ASTC_2D_12X12_SRGB
{GL_COMPRESSED_RGBA_ASTC_8x6_KHR}, // ASTC_2D_8X6_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR}, // ASTC_2D_8X6_SRGB
{GL_COMPRESSED_RGBA_ASTC_6x5_KHR}, // ASTC_2D_6X5_UNORM
{GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR}, // ASTC_2D_6X5_SRGB
{GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV}, // E5B9G9R9_FLOAT
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT}, // D32_FLOAT
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT}, // D16_UNORM
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // D24_UNORM_S8_UINT
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8}, // S8_UINT_D24_UNORM
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL,
GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
}};
inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) {
ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size());
return FORMAT_TABLE[static_cast<size_t>(pixel_format)];
}
inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) {
switch (attrib.type) {
case Maxwell::VertexAttribute::Type::UnsignedNorm:

View file

@ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_,
std::unique_ptr<Core::Frontend::GraphicsContext> context_)
: RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_},
emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu},
program_manager{device},
rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) {
if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
@ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() {
OGLShader fragment_shader;
fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER);
vertex_program.Create(true, false, vertex_shader.handle);
fragment_program.Create(true, false, fragment_shader.handle);
pipeline.Create();
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle);
// Generate presentation sampler
present_sampler.Create();
@ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
// Set projection matrix
const std::array ortho_matrix =
MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
std::data(ortho_matrix));
program_manager.BindProgram(present_program.handle);
glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
@ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
state_tracker.NotifyClipControl();
state_tracker.NotifyAlphaTest();
program_manager.BindHostPipeline(pipeline.handle);
state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
glEnable(GL_CULL_FACE);
if (screen_info.display_srgb) {
@ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
glClear(GL_COLOR_BUFFER_BIT);
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
program_manager.RestoreGuestPipeline();
// TODO
// program_manager.RestoreGuestPipeline();
}
void RendererOpenGL::RenderScreenshot() {

View file

@ -12,7 +12,6 @@
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
@ -111,9 +110,7 @@ private:
// OpenGL object IDs
OGLSampler present_sampler;
OGLBuffer vertex_buffer;
OGLProgram vertex_program;
OGLProgram fragment_program;
OGLPipeline pipeline;
OGLProgram present_program;
OGLFramebuffer screenshot_framebuffer;
// GPU address of the vertex buffer

View file

@ -16,7 +16,6 @@
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h"
@ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
.width = VideoCore::Surface::DefaultBlockWidth(image.info.format),
.height = VideoCore::Surface::DefaultBlockHeight(image.info.format),
};
program_manager.BindHostCompute(astc_decoder_program.handle);
program_manager.BindProgram(astc_decoder_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle);
@ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_INPUT_BUFFER = 1;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle);
program_manager.BindProgram(block_linear_unswizzle_2d_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
@ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle);
program_manager.BindProgram(block_linear_unswizzle_3d_program.handle);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle);
const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format));
@ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map,
UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block),
"Non-power of two images are not implemented");
program_manager.BindHostCompute(pitch_unswizzle_program.handle);
program_manager.BindProgram(pitch_unswizzle_program.handle);
glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes);
glUniform2ui(LOC_ORIGIN, 0, 0);
glUniform2i(LOC_DESTINATION, 0, 0);
@ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
static constexpr GLuint LOC_SRC_OFFSET = 0;
static constexpr GLuint LOC_DST_OFFSET = 1;
program_manager.BindHostCompute(copy_bc4_program.handle);
program_manager.BindProgram(copy_bc4_program.handle);
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_subresource.base_layer == 0);
@ -286,7 +285,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
break;
case 4: {
// BGRA8 copy
program_manager.BindHostCompute(copy_bgra_program.handle);
program_manager.BindProgram(copy_bgra_program.handle);
constexpr GLenum FORMAT = GL_RGBA8;
for (const ImageCopy& copy : copies) {
ASSERT(copy.src_offset == zero_offset);

View file

@ -19,23 +19,6 @@
namespace Vulkan {
struct TextureHandle {
explicit TextureHandle(u32 data, bool via_header_index) {
[[likely]] if (via_header_index) {
image = data;
sampler = data;
}
else {
const Tegra::Texture::TextureHandle handle{data};
image = handle.tic_id;
sampler = via_header_index ? image : handle.tsc_id.Value();
}
}
u32 image;
u32 sampler;
};
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {}

View file

@ -140,8 +140,8 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = false;
static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false;
static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
};
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

View file

@ -18,6 +18,9 @@
namespace Vulkan {
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool,
VKUpdateDescriptorQueue& update_descriptor_queue_,
Common::ThreadWorker* thread_worker, const Shader::Info& info_,
@ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
secondary_offset};
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
return TextureHandle{lhs_raw | rhs_raw, via_header_index};
return TexturePair(lhs_raw | rhs_raw, via_header_index);
}
}
return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index};
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const TextureHandle handle{read_handle(desc, index)};
image_view_indices.push_back(handle.image);
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
}
}};
std::ranges::for_each(info.texture_buffer_descriptors, add_image);
std::ranges::for_each(info.image_buffer_descriptors, add_image);
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const TextureHandle handle{read_handle(desc, index)};
image_view_indices.push_back(handle.image);
const auto handle{read_handle(desc, index)};
image_view_indices.push_back(handle.first);
Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
Sampler* const sampler = texture_cache.GetComputeSampler(handle.second);
samplers.push_back(sampler->Handle());
}
}
@ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
ImageId* texture_buffer_ids{image_view_ids.data()};
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) {
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids);
buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written);
is_written, is_image);
++texture_buffer_ids;
++index;
}

View file

@ -19,7 +19,7 @@
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/vulkan_common/vulkan_device.h"
#ifdef _MSC_VER
#if defined(_MSC_VER) && defined(NDEBUG)
#define LAMBDA_FORCEINLINE [[msvc::forceinline]]
#else
#define LAMBDA_FORCEINLINE
@ -30,6 +30,7 @@ namespace {
using boost::container::small_vector;
using boost::container::static_vector;
using Shader::ImageBufferDescriptor;
using Tegra::Texture::TexturePair;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::PixelFormatFromDepthFormat;
using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
@ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const u32 lhs_raw{gpu_memory.Read<u32>(addr)};
const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)};
const u32 raw{lhs_raw | rhs_raw};
return TextureHandle{raw, via_header_index};
return TexturePair(raw, via_header_index);
}
}
return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index};
return TexturePair(gpu_memory.Read<u32>(addr), via_header_index);
}};
const auto add_image{[&](const auto& desc) {
for (u32 index = 0; index < desc.count; ++index) {
const TextureHandle handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.image;
const auto handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.first;
}
}};
if constexpr (Spec::has_texture_buffers) {
@ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
}
for (const auto& desc : info.texture_descriptors) {
for (u32 index = 0; index < desc.count; ++index) {
const TextureHandle handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.image;
const auto handle{read_handle(desc, index)};
image_view_indices[image_index++] = handle.first;
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)};
Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)};
samplers[sampler_index++] = sampler->Handle();
}
}
@ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) {
const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE {
size_t index{};
const auto add_buffer{[&](const auto& desc) {
constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>;
for (u32 i = 0; i < desc.count; ++i) {
bool is_written{false};
if constexpr (std::is_same_v<decltype(desc), const ImageBufferDescriptor&>) {
if constexpr (is_image) {
is_written = desc.is_written;
}
ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)};
buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(),
image_view.BufferSize(), image_view.format,
is_written);
is_written, is_image);
++index;
++texture_buffer_index;
}

View file

@ -342,28 +342,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
}
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
GraphicsEnvironments environments;
GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
main_pools.ReleaseContents();
std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs;
boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs;
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
if (graphics_key.unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
auto& env{graphics_envs[index]};
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
envs.push_back(&env);
}
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)};
if (pipeline_cache_filename.empty()) {
return pipeline;
}
serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] {
serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] {
boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
env_ptrs;
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {

View file

@ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) {
return scissor;
}
struct TextureHandle {
constexpr TextureHandle(u32 data, bool via_header_index) {
const Tegra::Texture::TextureHandle handle{data};
image = handle.tic_id;
sampler = via_header_index ? image : handle.tsc_id.Value();
}
u32 image;
u32 sampler;
};
DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced,
bool is_indexed) {
DrawParams params{

View file

@ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() {
return MakeShaderInfo(env, *cpu_shader_addr);
}
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
size_t env_index{};
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
if (unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
auto& env{result.envs[index]};
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
result.env_ptrs[env_index++] = &env;
}
}
ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
std::scoped_lock lock{lookup_mutex};

View file

@ -4,14 +4,18 @@
#pragma once
#include <algorithm>
#include <array>
#include <memory>
#include <mutex>
#include <span>
#include <unordered_map>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/shader_environment.h"
namespace Tegra {
class MemoryManager;
@ -30,6 +34,8 @@ class ShaderCache {
static constexpr u64 PAGE_BITS = 14;
static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
static constexpr size_t NUM_PROGRAMS = 6;
struct Entry {
VAddr addr_start;
VAddr addr_end;
@ -58,6 +64,15 @@ public:
void SyncGuestHost();
protected:
struct GraphicsEnvironments {
std::array<GraphicsEnvironment, NUM_PROGRAMS> envs;
std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs;
std::span<Shader::Environment* const> Span() const noexcept {
return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr));
}
};
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_);
@ -65,17 +80,21 @@ protected:
/// @brief Update the hashes and information of shader stages
/// @param unique_hashes Shader hashes to store into when a stage is enabled
/// @return True no success, false on error
bool RefreshStages(std::array<u64, 6>& unique_hashes);
bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes);
/// @brief Returns information about the current compute shader
/// @return Pointer to a valid shader, nullptr on error
const ShaderInfo* ComputeShader();
/// @brief Collect the current graphics environments
void GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes);
Tegra::MemoryManager& gpu_memory;
Tegra::Engines::Maxwell3D& maxwell3d;
Tegra::Engines::KeplerCompute& kepler_compute;
std::array<const ShaderInfo*, 6> shader_infos{};
std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
bool last_shaders_valid = false;
private:

View file

@ -187,8 +187,8 @@ std::optional<u64> GenericEnvironment::TryFindSize() {
Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
bool via_header_index, u32 raw) {
const TextureHandle handle{raw, via_header_index};
const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)};
const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)};
Tegra::Texture::TICEntry entry;
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
const Shader::TextureType result{ConvertType(entry)};

View file

@ -29,22 +29,6 @@ class Memorymanager;
namespace VideoCommon {
struct TextureHandle {
explicit TextureHandle(u32 data, bool via_header_index) {
if (via_header_index) {
image = data;
sampler = data;
} else {
const Tegra::Texture::TextureHandle handle{data};
image = handle.tic_id;
sampler = via_header_index ? image : handle.tsc_id.Value();
}
}
u32 image;
u32 sampler;
};
class GenericEnvironment : public Shader::Environment {
public:
explicit GenericEnvironment() = default;

View file

@ -43,7 +43,7 @@ std::string Name(const ImageBase& image) {
return "Invalid";
}
std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) {
std::string Name(const ImageViewBase& image_view) {
const u32 width = image_view.size.width;
const u32 height = image_view.size.height;
const u32 depth = image_view.size.depth;
@ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t
const u32 num_layers = image_view.range.extent.layers;
const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : "";
switch (type.value_or(image_view.type)) {
switch (image_view.type) {
case ImageViewType::e1D:
return fmt::format("ImageView 1D {}{}", width, level);
case ImageViewType::e2D:

View file

@ -255,8 +255,7 @@ struct RenderTargets;
[[nodiscard]] std::string Name(const ImageBase& image);
[[nodiscard]] std::string Name(const ImageViewBase& image_view,
std::optional<ImageViewType> type = std::nullopt);
[[nodiscard]] std::string Name(const ImageViewBase& image_view);
[[nodiscard]] std::string Name(const RenderTargets& render_targets);

View file

@ -154,6 +154,15 @@ union TextureHandle {
};
static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
[[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) {
if (via_header_index) {
return {raw, raw};
} else {
const Tegra::Texture::TextureHandle handle{raw};
return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id};
}
}
struct TICEntry {
union {
struct {

View file

@ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR,
.pNext = nullptr,
.storageBuffer16BitAccess = false,
.storageBuffer16BitAccess = true,
.uniformAndStorageBuffer16BitAccess = true,
.storagePushConstant16 = false,
.storageInputOutput16 = false,