VM_Manager: Align allocated memory to 256bytes

This commit ensures that all backing memory allocated for the Guest CPU
is aligned to 256 bytes. This due to how gpu memory works and the heavy
constraints it has in the alignment of physical memory.
This commit is contained in:
Fernando Sahmkow 2019-07-18 18:15:53 -04:00 committed by FernandoS27
parent 5d369112d9
commit 9bede4eeed
15 changed files with 131 additions and 36 deletions

View file

@ -3,7 +3,10 @@
#pragma once
#include <cstddef>
#include <cstdlib>
#include <type_traits>
#include <malloc.h>
#include <stdlib.h>
namespace Common {
@ -37,4 +40,80 @@ constexpr bool IsWordAligned(T value) {
return (value & 0b11) == 0;
}
template <typename T, std::size_t Align = 16>
class AlignmentAllocator {
public:
typedef T value_type;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
public:
inline AlignmentAllocator() throw() {}
template <typename T2>
inline AlignmentAllocator(const AlignmentAllocator<T2, Align>&) throw() {}
inline ~AlignmentAllocator() throw() {}
inline pointer adress(reference r) {
return &r;
}
inline const_pointer adress(const_reference r) const {
return &r;
}
#if (defined _MSC_VER)
inline pointer allocate(size_type n) {
return (pointer)_aligned_malloc(n * sizeof(value_type), Align);
}
inline void deallocate(pointer p, size_type) {
_aligned_free(p);
}
#else
inline pointer allocate(size_type n) {
return (pointer)std::aligned_alloc(Align, n * sizeof(value_type));
}
inline void deallocate(pointer p, size_type) {
std::free(p);
}
#endif
inline void construct(pointer p, const value_type& wert) {
new (p) value_type(wert);
}
inline void destroy(pointer p) {
p->~value_type();
}
inline size_type max_size() const throw() {
return size_type(-1) / sizeof(value_type);
}
template <typename T2>
struct rebind {
typedef AlignmentAllocator<T2, Align> other;
};
bool operator!=(const AlignmentAllocator<T, Align>& other) const {
return !(*this == other);
}
// Returns true if and only if storage allocated from *this
// can be deallocated from other, and vice versa.
// Always returns true for stateless allocators.
bool operator==(const AlignmentAllocator<T, Align>& other) const {
return true;
}
};
} // namespace Common

View file

@ -8,6 +8,7 @@
#include <vector>
#include "common/common_types.h"
#include "core/hle/kernel/physical_memory.h"
namespace Kernel {
@ -77,7 +78,7 @@ struct CodeSet final {
}
/// The overall data that backs this code set.
std::vector<u8> memory;
Kernel::PhysicalMemory memory;
/// The segments that comprise this code set.
std::array<Segment, 3> segments;

View file

@ -0,0 +1,13 @@
// Copyright 2019 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "common/alignment.h"
namespace Kernel {
using PhysicalMemory = std::vector<u8, Common::AlignmentAllocator<u8, 256>>;
}

View file

@ -247,7 +247,7 @@ VAddr Process::CreateTLSRegion() {
ASSERT(region_address.Succeeded());
const auto map_result = vm_manager.MapMemoryBlock(
*region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0,
*region_address, std::make_shared<PhysicalMemory>(Memory::PAGE_SIZE), 0,
Memory::PAGE_SIZE, MemoryState::ThreadLocal);
ASSERT(map_result.Succeeded());
@ -277,7 +277,7 @@ void Process::FreeTLSRegion(VAddr tls_address) {
}
void Process::LoadModule(CodeSet module_, VAddr base_addr) {
const auto memory = std::make_shared<std::vector<u8>>(std::move(module_.memory));
const auto memory = std::make_shared<PhysicalMemory>(std::move(module_.memory));
const auto MapSegment = [&](const CodeSet::Segment& segment, VMAPermission permissions,
MemoryState memory_state) {
@ -327,7 +327,7 @@ void Process::AllocateMainThreadStack(u64 stack_size) {
// Allocate and map the main thread stack
const VAddr mapping_address = vm_manager.GetTLSIORegionEndAddress() - main_thread_stack_size;
vm_manager
.MapMemoryBlock(mapping_address, std::make_shared<std::vector<u8>>(main_thread_stack_size),
.MapMemoryBlock(mapping_address, std::make_shared<PhysicalMemory>(main_thread_stack_size),
0, main_thread_stack_size, MemoryState::Stack)
.Unwrap();
}

View file

@ -28,7 +28,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
shared_memory->other_permissions = other_permissions;
if (address == 0) {
shared_memory->backing_block = std::make_shared<std::vector<u8>>(size);
shared_memory->backing_block = std::make_shared<Kernel::PhysicalMemory>(size);
shared_memory->backing_block_offset = 0;
// Refresh the address mappings for the current process.
@ -59,7 +59,7 @@ SharedPtr<SharedMemory> SharedMemory::Create(KernelCore& kernel, Process* owner_
}
SharedPtr<SharedMemory> SharedMemory::CreateForApplet(
KernelCore& kernel, std::shared_ptr<std::vector<u8>> heap_block, std::size_t offset, u64 size,
KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset, u64 size,
MemoryPermission permissions, MemoryPermission other_permissions, std::string name) {
SharedPtr<SharedMemory> shared_memory(new SharedMemory(kernel));

View file

@ -10,6 +10,7 @@
#include "common/common_types.h"
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/physical_memory.h"
#include "core/hle/kernel/process.h"
#include "core/hle/result.h"
@ -62,12 +63,10 @@ public:
* block.
* @param name Optional object name, used for debugging purposes.
*/
static SharedPtr<SharedMemory> CreateForApplet(KernelCore& kernel,
std::shared_ptr<std::vector<u8>> heap_block,
std::size_t offset, u64 size,
MemoryPermission permissions,
MemoryPermission other_permissions,
std::string name = "Unknown Applet");
static SharedPtr<SharedMemory> CreateForApplet(
KernelCore& kernel, std::shared_ptr<Kernel::PhysicalMemory> heap_block, std::size_t offset,
u64 size, MemoryPermission permissions, MemoryPermission other_permissions,
std::string name = "Unknown Applet");
std::string GetTypeName() const override {
return "SharedMemory";
@ -135,7 +134,7 @@ private:
~SharedMemory() override;
/// Backing memory for this shared memory block.
std::shared_ptr<std::vector<u8>> backing_block;
std::shared_ptr<PhysicalMemory> backing_block;
/// Offset into the backing block for this shared memory.
std::size_t backing_block_offset = 0;
/// Size of the memory block. Page-aligned.

View file

@ -47,7 +47,7 @@ ResultCode TransferMemory::MapMemory(VAddr address, u64 size, MemoryPermission p
return ERR_INVALID_STATE;
}
backing_block = std::make_shared<std::vector<u8>>(size);
backing_block = std::make_shared<PhysicalMemory>(size);
const auto map_state = owner_permissions == MemoryPermission::None
? MemoryState::TransferMemoryIsolated

View file

@ -8,6 +8,7 @@
#include <vector>
#include "core/hle/kernel/object.h"
#include "core/hle/kernel/physical_memory.h"
union ResultCode;
@ -82,7 +83,7 @@ private:
~TransferMemory() override;
/// Memory block backing this instance.
std::shared_ptr<std::vector<u8>> backing_block;
std::shared_ptr<PhysicalMemory> backing_block;
/// The base address for the memory managed by this instance.
VAddr base_address = 0;

View file

@ -5,6 +5,7 @@
#include <algorithm>
#include <iterator>
#include <utility>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/memory_hook.h"
@ -103,7 +104,7 @@ bool VMManager::IsValidHandle(VMAHandle handle) const {
}
ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
std::shared_ptr<std::vector<u8>> block,
std::shared_ptr<PhysicalMemory> block,
std::size_t offset, u64 size,
MemoryState state, VMAPermission perm) {
ASSERT(block != nullptr);
@ -260,7 +261,7 @@ ResultVal<VAddr> VMManager::SetHeapSize(u64 size) {
if (heap_memory == nullptr) {
// Initialize heap
heap_memory = std::make_shared<std::vector<u8>>(size);
heap_memory = std::make_shared<PhysicalMemory>(size);
heap_end = heap_region_base + size;
} else {
UnmapRange(heap_region_base, GetCurrentHeapSize());
@ -341,7 +342,7 @@ ResultCode VMManager::MapPhysicalMemory(VAddr target, u64 size) {
const auto map_size = std::min(end_addr - cur_addr, vma_end - cur_addr);
if (vma.state == MemoryState::Unmapped) {
const auto map_res =
MapMemoryBlock(cur_addr, std::make_shared<std::vector<u8>>(map_size, 0), 0,
MapMemoryBlock(cur_addr, std::make_shared<PhysicalMemory>(map_size, 0), 0,
map_size, MemoryState::Heap, VMAPermission::ReadWrite);
result = map_res.Code();
if (result.IsError()) {
@ -442,7 +443,7 @@ ResultCode VMManager::UnmapPhysicalMemory(VAddr target, u64 size) {
if (result.IsError()) {
for (const auto [map_address, map_size] : unmapped_regions) {
const auto remap_res =
MapMemoryBlock(map_address, std::make_shared<std::vector<u8>>(map_size, 0), 0,
MapMemoryBlock(map_address, std::make_shared<PhysicalMemory>(map_size, 0), 0,
map_size, MemoryState::Heap, VMAPermission::None);
ASSERT_MSG(remap_res.Succeeded(), "UnmapPhysicalMemory re-map on error");
}
@ -593,7 +594,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
ASSERT_MSG(vma_offset + size <= vma->second.size,
"Shared memory exceeds bounds of mapped block");
const std::shared_ptr<std::vector<u8>>& backing_block = vma->second.backing_block;
const std::shared_ptr<PhysicalMemory>& backing_block = vma->second.backing_block;
const std::size_t backing_block_offset = vma->second.offset + vma_offset;
CASCADE_RESULT(auto new_vma,
@ -606,7 +607,7 @@ ResultCode VMManager::MirrorMemory(VAddr dst_addr, VAddr src_addr, u64 size, Mem
return RESULT_SUCCESS;
}
void VMManager::RefreshMemoryBlockMappings(const std::vector<u8>* block) {
void VMManager::RefreshMemoryBlockMappings(const PhysicalMemory* block) {
// If this ever proves to have a noticeable performance impact, allow users of the function to
// specify a specific range of addresses to limit the scan to.
for (const auto& p : vma_map) {
@ -764,7 +765,7 @@ void VMManager::MergeAdjacentVMA(VirtualMemoryArea& left, const VirtualMemoryAre
right.backing_block->begin() + right.offset + right.size);
} else {
// Slow case: make a new memory block for left and right.
auto new_memory = std::make_shared<std::vector<u8>>();
auto new_memory = std::make_shared<PhysicalMemory>();
new_memory->insert(new_memory->end(), left.backing_block->begin() + left.offset,
left.backing_block->begin() + left.offset + left.size);
new_memory->insert(new_memory->end(), right.backing_block->begin() + right.offset,

View file

@ -11,6 +11,7 @@
#include "common/common_types.h"
#include "common/memory_hook.h"
#include "common/page_table.h"
#include "core/hle/kernel/physical_memory.h"
#include "core/hle/result.h"
#include "core/memory.h"
@ -290,7 +291,7 @@ struct VirtualMemoryArea {
// Settings for type = AllocatedMemoryBlock
/// Memory block backing this VMA.
std::shared_ptr<std::vector<u8>> backing_block = nullptr;
std::shared_ptr<PhysicalMemory> backing_block = nullptr;
/// Offset into the backing_memory the mapping starts from.
std::size_t offset = 0;
@ -348,7 +349,7 @@ public:
* @param size Size of the mapping.
* @param state MemoryState tag to attach to the VMA.
*/
ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<std::vector<u8>> block,
ResultVal<VMAHandle> MapMemoryBlock(VAddr target, std::shared_ptr<PhysicalMemory> block,
std::size_t offset, u64 size, MemoryState state,
VMAPermission perm = VMAPermission::ReadWrite);
@ -547,7 +548,7 @@ public:
* Scans all VMAs and updates the page table range of any that use the given vector as backing
* memory. This should be called after any operation that causes reallocation of the vector.
*/
void RefreshMemoryBlockMappings(const std::vector<u8>* block);
void RefreshMemoryBlockMappings(const PhysicalMemory* block);
/// Dumps the address space layout to the log, for debugging
void LogLayout() const;
@ -777,7 +778,7 @@ private:
// the entire virtual address space extents that bound the allocations, including any holes.
// This makes deallocation and reallocation of holes fast and keeps process memory contiguous
// in the emulator address space, allowing Memory::GetPointer to be reasonably safe.
std::shared_ptr<std::vector<u8>> heap_memory;
std::shared_ptr<PhysicalMemory> heap_memory;
// The end of the currently allocated heap. This is not an inclusive
// end of the range. This is essentially 'base_address + current_size'.

View file

@ -77,7 +77,7 @@ enum class LoadState : u32 {
Done = 1,
};
static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& output,
static void DecryptSharedFont(const std::vector<u32>& input, Kernel::PhysicalMemory& output,
std::size_t& offset) {
ASSERT_MSG(offset + (input.size() * sizeof(u32)) < SHARED_FONT_MEM_SIZE,
"Shared fonts exceeds 17mb!");
@ -94,7 +94,7 @@ static void DecryptSharedFont(const std::vector<u32>& input, std::vector<u8>& ou
offset += transformed_font.size() * sizeof(u32);
}
static void EncryptSharedFont(const std::vector<u8>& input, std::vector<u8>& output,
static void EncryptSharedFont(const std::vector<u8>& input, Kernel::PhysicalMemory& output,
std::size_t& offset) {
ASSERT_MSG(offset + input.size() + 8 < SHARED_FONT_MEM_SIZE, "Shared fonts exceeds 17mb!");
const u32 KEY = EXPECTED_MAGIC ^ EXPECTED_RESULT;
@ -121,7 +121,7 @@ struct PL_U::Impl {
return shared_font_regions.at(index);
}
void BuildSharedFontsRawRegions(const std::vector<u8>& input) {
void BuildSharedFontsRawRegions(const Kernel::PhysicalMemory& input) {
// As we can derive the xor key we can just populate the offsets
// based on the shared memory dump
unsigned cur_offset = 0;
@ -144,7 +144,7 @@ struct PL_U::Impl {
Kernel::SharedPtr<Kernel::SharedMemory> shared_font_mem;
/// Backing memory for the shared font data
std::shared_ptr<std::vector<u8>> shared_font;
std::shared_ptr<Kernel::PhysicalMemory> shared_font;
// Automatically populated based on shared_fonts dump or system archives.
std::vector<FontRegion> shared_font_regions;
@ -166,7 +166,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
// Rebuild shared fonts from data ncas
if (nand->HasEntry(static_cast<u64>(FontArchives::Standard),
FileSys::ContentRecordType::Data)) {
impl->shared_font = std::make_shared<std::vector<u8>>(SHARED_FONT_MEM_SIZE);
impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(SHARED_FONT_MEM_SIZE);
for (auto font : SHARED_FONTS) {
const auto nca =
nand->GetEntry(static_cast<u64>(font.first), FileSys::ContentRecordType::Data);
@ -207,7 +207,7 @@ PL_U::PL_U() : ServiceFramework("pl:u"), impl{std::make_unique<Impl>()} {
}
} else {
impl->shared_font = std::make_shared<std::vector<u8>>(
impl->shared_font = std::make_shared<Kernel::PhysicalMemory>(
SHARED_FONT_MEM_SIZE); // Shared memory needs to always be allocated and a fixed size
const std::string user_path = FileUtil::GetUserPath(FileUtil::UserPath::SysDataDir);

View file

@ -295,7 +295,7 @@ Kernel::CodeSet ElfReader::LoadInto(VAddr vaddr) {
}
}
std::vector<u8> program_image(total_image_size);
Kernel::PhysicalMemory program_image(total_image_size);
std::size_t current_image_position = 0;
Kernel::CodeSet codeset;

View file

@ -69,7 +69,7 @@ AppLoader::LoadResult AppLoader_KIP::Load(Kernel::Process& process) {
const VAddr base_address = process.VMManager().GetCodeRegionBaseAddress();
Kernel::CodeSet codeset;
std::vector<u8> program_image;
Kernel::PhysicalMemory program_image;
const auto load_segment = [&program_image](Kernel::CodeSet::Segment& segment,
const std::vector<u8>& data, u32 offset) {

View file

@ -143,7 +143,7 @@ static bool LoadNroImpl(Kernel::Process& process, const std::vector<u8>& data,
}
// Build program image
std::vector<u8> program_image(PageAlignSize(nro_header.file_size));
Kernel::PhysicalMemory program_image(PageAlignSize(nro_header.file_size));
std::memcpy(program_image.data(), data.data(), program_image.size());
if (program_image.size() != PageAlignSize(nro_header.file_size)) {
return {};

View file

@ -89,7 +89,7 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::Process& process,
// Build program image
Kernel::CodeSet codeset;
std::vector<u8> program_image;
Kernel::PhysicalMemory program_image;
for (std::size_t i = 0; i < nso_header.segments.size(); ++i) {
std::vector<u8> data =
file.ReadBytes(nso_header.segments_compressed_size[i], nso_header.segments[i].offset);