This commit is contained in:
Crunch (Chaz9) 2024-09-29 21:31:09 +01:00
parent 76f6f8de80
commit 3aca4a3490
4 changed files with 596 additions and 449 deletions

View file

@ -3,9 +3,18 @@
#include <algorithm>
#include <array>
#include <atomic>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <thread>
#include <vector>
#include "common/assert.h"
#include "common/fs/file.h"
#include "common/fs/path_util.h"
#include "common/logging/log.h"
#include "common/thread_worker.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
#include "video_core/control/channel_state.h"
@ -19,233 +28,288 @@
namespace VideoCommon {
constexpr size_t MAX_SHADER_CACHE_SIZE = 1024 * 1024 * 1024; // 1GB
class ShaderCacheWorker : public Common::ThreadWorker {
public:
explicit ShaderCacheWorker(const std::string& name) : ThreadWorker(name) {}
~ShaderCacheWorker() = default;
void CompileShader(ShaderInfo* shader) {
Push([shader]() {
// Compile shader here
// This is a placeholder for the actual compilation process
std::this_thread::sleep_for(std::chrono::milliseconds(10));
shader->is_compiled.store(true, std::memory_order_release);
});
}
};
class ShaderCache::Impl {
public:
explicit Impl(Tegra::MaxwellDeviceMemoryManager& device_memory_)
: device_memory{device_memory_}, workers{CreateWorkers()} {
LoadCache();
}
~Impl() {
SaveCache();
}
void InvalidateRegion(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
RemovePendingShaders();
}
void OnCacheInvalidation(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
}
void SyncGuestHost() {
std::scoped_lock lock{invalidation_mutex};
RemovePendingShaders();
}
bool RefreshStages(std::array<u64, 6>& unique_hashes);
const ShaderInfo* ComputeShader();
void GetGraphicsEnvironments(GraphicsEnvironments& result, const std::array<u64, NUM_PROGRAMS>& unique_hashes);
ShaderInfo* TryGet(VAddr addr) const {
std::scoped_lock lock{lookup_mutex};
const auto it = lookup_cache.find(addr);
if (it == lookup_cache.end()) {
return nullptr;
}
return it->second->data;
}
void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
const VAddr addr_end = addr + size;
Entry* const entry = NewEntry(addr, addr_end, data.get());
const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
invalidation_cache[page].push_back(entry);
}
storage.push_back(std::move(data));
device_memory.UpdatePagesCachedCount(addr, size, 1);
}
private:
std::vector<std::unique_ptr<ShaderCacheWorker>> CreateWorkers() {
const size_t num_workers = std::thread::hardware_concurrency();
std::vector<std::unique_ptr<ShaderCacheWorker>> workers;
workers.reserve(num_workers);
for (size_t i = 0; i < num_workers; ++i) {
workers.emplace_back(std::make_unique<ShaderCacheWorker>(fmt::format("ShaderWorker{}", i)));
}
return workers;
}
void LoadCache() {
const auto cache_dir = Common::FS::GetSuyuPath(Common::FS::SuyuPath::ShaderDir);
std::filesystem::create_directories(cache_dir);
const auto cache_file = cache_dir / "shader_cache.bin";
if (!std::filesystem::exists(cache_file)) {
return;
}
std::ifstream file(cache_file, std::ios::binary);
if (!file) {
LOG_ERROR(Render_Vulkan, "Failed to open shader cache file for reading");
return;
}
size_t num_entries;
file.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
for (size_t i = 0; i < num_entries; ++i) {
VAddr addr;
size_t size;
file.read(reinterpret_cast<char*>(&addr), sizeof(addr));
file.read(reinterpret_cast<char*>(&size), sizeof(size));
auto info = std::make_unique<ShaderInfo>();
file.read(reinterpret_cast<char*>(info.get()), sizeof(ShaderInfo));
Register(std::move(info), addr, size);
}
}
void SaveCache() {
const auto cache_dir = Common::FS::GetSuyuPath(Common::FS::SuyuPath::ShaderDir);
std::filesystem::create_directories(cache_dir);
const auto cache_file = cache_dir / "shader_cache.bin";
std::ofstream file(cache_file, std::ios::binary | std::ios::trunc);
if (!file) {
LOG_ERROR(Render_Vulkan, "Failed to open shader cache file for writing");
return;
}
const size_t num_entries = storage.size();
file.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
for (const auto& shader : storage) {
const VAddr addr = shader->addr;
const size_t size = shader->size_bytes;
file.write(reinterpret_cast<const char*>(&addr), sizeof(addr));
file.write(reinterpret_cast<const char*>(&size), sizeof(size));
file.write(reinterpret_cast<const char*>(shader.get()), sizeof(ShaderInfo));
}
}
void InvalidatePagesInRegion(VAddr addr, size_t size) {
const VAddr addr_end = addr + size;
const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
auto it = invalidation_cache.find(page);
if (it == invalidation_cache.end()) {
continue;
}
InvalidatePageEntries(it->second, addr, addr_end);
}
}
void RemovePendingShaders() {
if (marked_for_removal.empty()) {
return;
}
// Remove duplicates
std::sort(marked_for_removal.begin(), marked_for_removal.end());
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
marked_for_removal.end());
std::vector<ShaderInfo*> removed_shaders;
std::scoped_lock lock{lookup_mutex};
for (Entry* const entry : marked_for_removal) {
removed_shaders.push_back(entry->data);
const auto it = lookup_cache.find(entry->addr_start);
ASSERT(it != lookup_cache.end());
lookup_cache.erase(it);
}
marked_for_removal.clear();
if (!removed_shaders.empty()) {
RemoveShadersFromStorage(removed_shaders);
}
}
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
size_t index = 0;
while (index < entries.size()) {
Entry* const entry = entries[index];
if (!entry->Overlaps(addr, addr_end)) {
++index;
continue;
}
UnmarkMemory(entry);
RemoveEntryFromInvalidationCache(entry);
marked_for_removal.push_back(entry);
}
}
void RemoveEntryFromInvalidationCache(const Entry* entry) {
const u64 page_end = (entry->addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
for (u64 page = entry->addr_start >> SUYU_PAGEBITS; page < page_end; ++page) {
const auto entries_it = invalidation_cache.find(page);
ASSERT(entries_it != invalidation_cache.end());
std::vector<Entry*>& entries = entries_it->second;
const auto entry_it = std::find(entries.begin(), entries.end(), entry);
ASSERT(entry_it != entries.end());
entries.erase(entry_it);
}
}
void UnmarkMemory(Entry* entry) {
if (!entry->is_memory_marked) {
return;
}
entry->is_memory_marked = false;
const VAddr addr = entry->addr_start;
const size_t size = entry->addr_end - addr;
device_memory.UpdatePagesCachedCount(addr, size, -1);
}
void RemoveShadersFromStorage(const std::vector<ShaderInfo*>& removed_shaders) {
storage.erase(
std::remove_if(storage.begin(), storage.end(),
[&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
return std::find(removed_shaders.begin(), removed_shaders.end(),
shader.get()) != removed_shaders.end();
}),
storage.end());
}
Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
Entry* const entry_pointer = entry.get();
lookup_cache.emplace(addr, std::move(entry));
return entry_pointer;
}
Tegra::MaxwellDeviceMemoryManager& device_memory;
std::vector<std::unique_ptr<ShaderCacheWorker>> workers;
mutable std::mutex lookup_mutex;
std::mutex invalidation_mutex;
std::unordered_map<VAddr, std::unique_ptr<Entry>> lookup_cache;
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
std::vector<std::unique_ptr<ShaderInfo>> storage;
std::vector<Entry*> marked_for_removal;
};
ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_)
: impl{std::make_unique<Impl>(device_memory_)} {}
ShaderCache::~ShaderCache() = default;
void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
RemovePendingShaders();
impl->InvalidateRegion(addr, size);
}
void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex};
InvalidatePagesInRegion(addr, size);
impl->OnCacheInvalidation(addr, size);
}
void ShaderCache::SyncGuestHost() {
std::scoped_lock lock{invalidation_mutex};
RemovePendingShaders();
impl->SyncGuestHost();
}
ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_)
: device_memory{device_memory_} {}
bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
auto& dirty{maxwell3d->dirty.flags};
if (!dirty[VideoCommon::Dirty::Shaders]) {
return last_shaders_valid;
}
dirty[VideoCommon::Dirty::Shaders] = false;
const GPUVAddr base_addr{maxwell3d->regs.program_region.Address()};
for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
unique_hashes[index] = 0;
continue;
}
const auto& shader_config{maxwell3d->regs.pipelines[index]};
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)};
if (program == Tegra::Engines::Maxwell3D::Regs::ShaderType::Pixel &&
!maxwell3d->regs.rasterize_enable) {
unique_hashes[index] = 0;
continue;
}
const GPUVAddr shader_addr{base_addr + shader_config.offset};
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
last_shaders_valid = false;
return false;
}
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
if (!shader_info) {
const u32 start_address{shader_config.offset};
GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
}
shader_infos[index] = shader_info;
unique_hashes[index] = shader_info->unique_hash;
}
last_shaders_valid = true;
return true;
return impl->RefreshStages(unique_hashes);
}
const ShaderInfo* ShaderCache::ComputeShader() {
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
const auto& qmd{kepler_compute->launch_description};
const GPUVAddr shader_addr{program_base + qmd.program_start};
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
if (!cpu_shader_addr) {
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
return nullptr;
}
if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
return shader;
}
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
return MakeShaderInfo(env, *cpu_shader_addr);
return impl->ComputeShader();
}
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
size_t env_index{};
const GPUVAddr base_addr{maxwell3d->regs.program_region.Address()};
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
if (unique_hashes[index] == 0) {
continue;
}
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)};
auto& env{result.envs[index]};
const u32 start_address{maxwell3d->regs.pipelines[index].offset};
env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
env.SetCachedSize(shader_infos[index]->size_bytes);
result.env_ptrs[env_index++] = &env;
}
impl->GetGraphicsEnvironments(result, unique_hashes);
}
ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
std::scoped_lock lock{lookup_mutex};
const auto it = lookup_cache.find(addr);
if (it == lookup_cache.end()) {
return nullptr;
}
return it->second->data;
return impl->TryGet(addr);
}
void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
const VAddr addr_end = addr + size;
Entry* const entry = NewEntry(addr, addr_end, data.get());
const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
invalidation_cache[page].push_back(entry);
}
storage.push_back(std::move(data));
device_memory.UpdatePagesCachedCount(addr, size, 1);
}
void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
const VAddr addr_end = addr + size;
const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
auto it = invalidation_cache.find(page);
if (it == invalidation_cache.end()) {
continue;
}
InvalidatePageEntries(it->second, addr, addr_end);
}
}
void ShaderCache::RemovePendingShaders() {
if (marked_for_removal.empty()) {
return;
}
// Remove duplicates
std::ranges::sort(marked_for_removal);
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
marked_for_removal.end());
boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
std::scoped_lock lock{lookup_mutex};
for (Entry* const entry : marked_for_removal) {
removed_shaders.push_back(entry->data);
const auto it = lookup_cache.find(entry->addr_start);
ASSERT(it != lookup_cache.end());
lookup_cache.erase(it);
}
marked_for_removal.clear();
if (!removed_shaders.empty()) {
RemoveShadersFromStorage(removed_shaders);
}
}
void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
size_t index = 0;
while (index < entries.size()) {
Entry* const entry = entries[index];
if (!entry->Overlaps(addr, addr_end)) {
++index;
continue;
}
UnmarkMemory(entry);
RemoveEntryFromInvalidationCache(entry);
marked_for_removal.push_back(entry);
}
}
void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
const u64 page_end = (entry->addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
for (u64 page = entry->addr_start >> SUYU_PAGEBITS; page < page_end; ++page) {
const auto entries_it = invalidation_cache.find(page);
ASSERT(entries_it != invalidation_cache.end());
std::vector<Entry*>& entries = entries_it->second;
const auto entry_it = std::ranges::find(entries, entry);
ASSERT(entry_it != entries.end());
entries.erase(entry_it);
}
}
void ShaderCache::UnmarkMemory(Entry* entry) {
if (!entry->is_memory_marked) {
return;
}
entry->is_memory_marked = false;
const VAddr addr = entry->addr_start;
const size_t size = entry->addr_end - addr;
device_memory.UpdatePagesCachedCount(addr, size, -1);
}
void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
// Remove them from the cache
std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
});
}
ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
Entry* const entry_pointer = entry.get();
lookup_cache.emplace(addr, std::move(entry));
return entry_pointer;
}
const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
auto info = std::make_unique<ShaderInfo>();
if (const std::optional<u64> cached_hash{env.Analyze()}) {
info->unique_hash = *cached_hash;
info->size_bytes = env.CachedSizeBytes();
} else {
// Slow path, not really hit on commercial games
// Build a control flow graph to get the real shader size
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
info->unique_hash = env.CalculateHash();
info->size_bytes = env.ReadSizeBytes();
}
const size_t size_bytes{info->size_bytes};
const ShaderInfo* const result{info.get()};
Register(std::move(info), cpu_addr, size_bytes);
return result;
impl->Register(std::move(data), addr, size);
}
} // namespace VideoCommon