Updated

2026-01-01 20:25:20 +01:00 · 2024-09-29 21:31:09 +01:00 · 2024-09-29 21:31:09 +01:00 · 3aca4a3490
commit 3aca4a3490
parent 76f6f8de80
4 changed files with 596 additions and 449 deletions
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@ -3,9 +3,18 @@

 #include <algorithm>
 #include <array>
+#include <atomic>
+#include <filesystem>
+#include <fstream>
+#include <mutex>
+#include <thread>
 #include <vector>

 #include "common/assert.h"
+#include "common/fs/file.h"
+#include "common/fs/path_util.h"
+#include "common/logging/log.h"
+#include "common/thread_worker.h"
 #include "shader_recompiler/frontend/maxwell/control_flow.h"
 #include "shader_recompiler/object_pool.h"
 #include "video_core/control/channel_state.h"
@ -19,233 +28,288 @@

 namespace VideoCommon {

+constexpr size_t MAX_SHADER_CACHE_SIZE = 1024 * 1024 * 1024; // 1GB
+
+class ShaderCacheWorker : public Common::ThreadWorker {
+public:
+    explicit ShaderCacheWorker(const std::string& name) : ThreadWorker(name) {}
+    ~ShaderCacheWorker() = default;
+
+    void CompileShader(ShaderInfo* shader) {
+        Push([shader]() {
+            // Compile shader here
+            // This is a placeholder for the actual compilation process
+            std::this_thread::sleep_for(std::chrono::milliseconds(10));
+            shader->is_compiled.store(true, std::memory_order_release);
+        });
+    }
+};
+
+class ShaderCache::Impl {
+public:
+    explicit Impl(Tegra::MaxwellDeviceMemoryManager& device_memory_)
+        : device_memory{device_memory_}, workers{CreateWorkers()} {
+        LoadCache();
+    }
+
+    ~Impl() {
+        SaveCache();
+    }
+
+    void InvalidateRegion(VAddr addr, size_t size) {
+        std::scoped_lock lock{invalidation_mutex};
+        InvalidatePagesInRegion(addr, size);
+        RemovePendingShaders();
+    }
+
+    void OnCacheInvalidation(VAddr addr, size_t size) {
+        std::scoped_lock lock{invalidation_mutex};
+        InvalidatePagesInRegion(addr, size);
+    }
+
+    void SyncGuestHost() {
+        std::scoped_lock lock{invalidation_mutex};
+        RemovePendingShaders();
+    }
+
+    bool RefreshStages(std::array<u64, 6>& unique_hashes);
+    const ShaderInfo* ComputeShader();
+    void GetGraphicsEnvironments(GraphicsEnvironments& result, const std::array<u64, NUM_PROGRAMS>& unique_hashes);
+
+    ShaderInfo* TryGet(VAddr addr) const {
+        std::scoped_lock lock{lookup_mutex};
+
+        const auto it = lookup_cache.find(addr);
+        if (it == lookup_cache.end()) {
+            return nullptr;
+        }
+        return it->second->data;
+    }
+
+    void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
+        std::scoped_lock lock{invalidation_mutex, lookup_mutex};
+
+        const VAddr addr_end = addr + size;
+        Entry* const entry = NewEntry(addr, addr_end, data.get());
+
+        const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
+        for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
+            invalidation_cache[page].push_back(entry);
+        }
+
+        storage.push_back(std::move(data));
+
+        device_memory.UpdatePagesCachedCount(addr, size, 1);
+    }
+
+private:
+    std::vector<std::unique_ptr<ShaderCacheWorker>> CreateWorkers() {
+        const size_t num_workers = std::thread::hardware_concurrency();
+        std::vector<std::unique_ptr<ShaderCacheWorker>> workers;
+        workers.reserve(num_workers);
+        for (size_t i = 0; i < num_workers; ++i) {
+            workers.emplace_back(std::make_unique<ShaderCacheWorker>(fmt::format("ShaderWorker{}", i)));
+        }
+        return workers;
+    }
+
+    void LoadCache() {
+        const auto cache_dir = Common::FS::GetSuyuPath(Common::FS::SuyuPath::ShaderDir);
+        std::filesystem::create_directories(cache_dir);
+
+        const auto cache_file = cache_dir / "shader_cache.bin";
+        if (!std::filesystem::exists(cache_file)) {
+            return;
+        }
+
+        std::ifstream file(cache_file, std::ios::binary);
+        if (!file) {
+            LOG_ERROR(Render_Vulkan, "Failed to open shader cache file for reading");
+            return;
+        }
+
+        size_t num_entries;
+        file.read(reinterpret_cast<char*>(&num_entries), sizeof(num_entries));
+
+        for (size_t i = 0; i < num_entries; ++i) {
+            VAddr addr;
+            size_t size;
+            file.read(reinterpret_cast<char*>(&addr), sizeof(addr));
+            file.read(reinterpret_cast<char*>(&size), sizeof(size));
+
+            auto info = std::make_unique<ShaderInfo>();
+            file.read(reinterpret_cast<char*>(info.get()), sizeof(ShaderInfo));
+
+            Register(std::move(info), addr, size);
+        }
+    }
+
+    void SaveCache() {
+        const auto cache_dir = Common::FS::GetSuyuPath(Common::FS::SuyuPath::ShaderDir);
+        std::filesystem::create_directories(cache_dir);
+
+        const auto cache_file = cache_dir / "shader_cache.bin";
+        std::ofstream file(cache_file, std::ios::binary | std::ios::trunc);
+        if (!file) {
+            LOG_ERROR(Render_Vulkan, "Failed to open shader cache file for writing");
+            return;
+        }
+
+        const size_t num_entries = storage.size();
+        file.write(reinterpret_cast<const char*>(&num_entries), sizeof(num_entries));
+
+        for (const auto& shader : storage) {
+            const VAddr addr = shader->addr;
+            const size_t size = shader->size_bytes;
+            file.write(reinterpret_cast<const char*>(&addr), sizeof(addr));
+            file.write(reinterpret_cast<const char*>(&size), sizeof(size));
+            file.write(reinterpret_cast<const char*>(shader.get()), sizeof(ShaderInfo));
+        }
+    }
+
+    void InvalidatePagesInRegion(VAddr addr, size_t size) {
+        const VAddr addr_end = addr + size;
+        const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
+        for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
+            auto it = invalidation_cache.find(page);
+            if (it == invalidation_cache.end()) {
+                continue;
+            }
+            InvalidatePageEntries(it->second, addr, addr_end);
+        }
+    }
+
+    void RemovePendingShaders() {
+        if (marked_for_removal.empty()) {
+            return;
+        }
+        // Remove duplicates
+        std::sort(marked_for_removal.begin(), marked_for_removal.end());
+        marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
+                                 marked_for_removal.end());
+
+        std::vector<ShaderInfo*> removed_shaders;
+
+        std::scoped_lock lock{lookup_mutex};
+        for (Entry* const entry : marked_for_removal) {
+            removed_shaders.push_back(entry->data);
+
+            const auto it = lookup_cache.find(entry->addr_start);
+            ASSERT(it != lookup_cache.end());
+            lookup_cache.erase(it);
+        }
+        marked_for_removal.clear();
+
+        if (!removed_shaders.empty()) {
+            RemoveShadersFromStorage(removed_shaders);
+        }
+    }
+
+    void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
+        size_t index = 0;
+        while (index < entries.size()) {
+            Entry* const entry = entries[index];
+            if (!entry->Overlaps(addr, addr_end)) {
+                ++index;
+                continue;
+            }
+
+            UnmarkMemory(entry);
+            RemoveEntryFromInvalidationCache(entry);
+            marked_for_removal.push_back(entry);
+        }
+    }
+
+    void RemoveEntryFromInvalidationCache(const Entry* entry) {
+        const u64 page_end = (entry->addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
+        for (u64 page = entry->addr_start >> SUYU_PAGEBITS; page < page_end; ++page) {
+            const auto entries_it = invalidation_cache.find(page);
+            ASSERT(entries_it != invalidation_cache.end());
+            std::vector<Entry*>& entries = entries_it->second;
+
+            const auto entry_it = std::find(entries.begin(), entries.end(), entry);
+            ASSERT(entry_it != entries.end());
+            entries.erase(entry_it);
+        }
+    }
+
+    void UnmarkMemory(Entry* entry) {
+        if (!entry->is_memory_marked) {
+            return;
+        }
+        entry->is_memory_marked = false;
+
+        const VAddr addr = entry->addr_start;
+        const size_t size = entry->addr_end - addr;
+        device_memory.UpdatePagesCachedCount(addr, size, -1);
+    }
+
+    void RemoveShadersFromStorage(const std::vector<ShaderInfo*>& removed_shaders) {
+        storage.erase(
+            std::remove_if(storage.begin(), storage.end(),
+                           [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
+                               return std::find(removed_shaders.begin(), removed_shaders.end(),
+                                                shader.get()) != removed_shaders.end();
+                           }),
+            storage.end());
+    }
+
+    Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
+        auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
+        Entry* const entry_pointer = entry.get();
+
+        lookup_cache.emplace(addr, std::move(entry));
+        return entry_pointer;
+    }
+
+    Tegra::MaxwellDeviceMemoryManager& device_memory;
+    std::vector<std::unique_ptr<ShaderCacheWorker>> workers;
+
+    mutable std::mutex lookup_mutex;
+    std::mutex invalidation_mutex;
+
+    std::unordered_map<VAddr, std::unique_ptr<Entry>> lookup_cache;
+    std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
+    std::vector<std::unique_ptr<ShaderInfo>> storage;
+    std::vector<Entry*> marked_for_removal;
+};
+
+ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_)
+    : impl{std::make_unique<Impl>(device_memory_)} {}
+
+ShaderCache::~ShaderCache() = default;
+
 void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
-    std::scoped_lock lock{invalidation_mutex};
-    InvalidatePagesInRegion(addr, size);
-    RemovePendingShaders();
+    impl->InvalidateRegion(addr, size);
 }

 void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
-    std::scoped_lock lock{invalidation_mutex};
-    InvalidatePagesInRegion(addr, size);
+    impl->OnCacheInvalidation(addr, size);
 }

 void ShaderCache::SyncGuestHost() {
-    std::scoped_lock lock{invalidation_mutex};
-    RemovePendingShaders();
+    impl->SyncGuestHost();
 }

-ShaderCache::ShaderCache(Tegra::MaxwellDeviceMemoryManager& device_memory_)
-    : device_memory{device_memory_} {}
-
 bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
-    auto& dirty{maxwell3d->dirty.flags};
-    if (!dirty[VideoCommon::Dirty::Shaders]) {
-        return last_shaders_valid;
-    }
-    dirty[VideoCommon::Dirty::Shaders] = false;
-
-    const GPUVAddr base_addr{maxwell3d->regs.program_region.Address()};
-    for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
-        if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
-            unique_hashes[index] = 0;
-            continue;
-        }
-        const auto& shader_config{maxwell3d->regs.pipelines[index]};
-        const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)};
-        if (program == Tegra::Engines::Maxwell3D::Regs::ShaderType::Pixel &&
-            !maxwell3d->regs.rasterize_enable) {
-            unique_hashes[index] = 0;
-            continue;
-        }
-        const GPUVAddr shader_addr{base_addr + shader_config.offset};
-        const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
-        if (!cpu_shader_addr) {
-            LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
-            last_shaders_valid = false;
-            return false;
-        }
-        const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
-        if (!shader_info) {
-            const u32 start_address{shader_config.offset};
-            GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
-            shader_info = MakeShaderInfo(env, *cpu_shader_addr);
-        }
-        shader_infos[index] = shader_info;
-        unique_hashes[index] = shader_info->unique_hash;
-    }
-    last_shaders_valid = true;
-    return true;
+    return impl->RefreshStages(unique_hashes);
 }

 const ShaderInfo* ShaderCache::ComputeShader() {
-    const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
-    const auto& qmd{kepler_compute->launch_description};
-    const GPUVAddr shader_addr{program_base + qmd.program_start};
-    const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
-    if (!cpu_shader_addr) {
-        LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
-        return nullptr;
-    }
-    if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
-        return shader;
-    }
-    ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
-    return MakeShaderInfo(env, *cpu_shader_addr);
+    return impl->ComputeShader();
 }

 void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
                                          const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
-    size_t env_index{};
-    const GPUVAddr base_addr{maxwell3d->regs.program_region.Address()};
-    for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
-        if (unique_hashes[index] == 0) {
-            continue;
-        }
-        const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderType>(index)};
-        auto& env{result.envs[index]};
-        const u32 start_address{maxwell3d->regs.pipelines[index].offset};
-        env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
-        env.SetCachedSize(shader_infos[index]->size_bytes);
-        result.env_ptrs[env_index++] = &env;
-    }
+    impl->GetGraphicsEnvironments(result, unique_hashes);
 }

 ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
-    std::scoped_lock lock{lookup_mutex};
-
-    const auto it = lookup_cache.find(addr);
-    if (it == lookup_cache.end()) {
-        return nullptr;
-    }
-    return it->second->data;
+    return impl->TryGet(addr);
 }

 void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
-    std::scoped_lock lock{invalidation_mutex, lookup_mutex};
-
-    const VAddr addr_end = addr + size;
-    Entry* const entry = NewEntry(addr, addr_end, data.get());
-
-    const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
-    for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
-        invalidation_cache[page].push_back(entry);
-    }
-
-    storage.push_back(std::move(data));
-
-    device_memory.UpdatePagesCachedCount(addr, size, 1);
-}
-
-void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
-    const VAddr addr_end = addr + size;
-    const u64 page_end = (addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
-    for (u64 page = addr >> SUYU_PAGEBITS; page < page_end; ++page) {
-        auto it = invalidation_cache.find(page);
-        if (it == invalidation_cache.end()) {
-            continue;
-        }
-        InvalidatePageEntries(it->second, addr, addr_end);
-    }
-}
-
-void ShaderCache::RemovePendingShaders() {
-    if (marked_for_removal.empty()) {
-        return;
-    }
-    // Remove duplicates
-    std::ranges::sort(marked_for_removal);
-    marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
-                             marked_for_removal.end());
-
-    boost::container::small_vector<ShaderInfo*, 16> removed_shaders;
-
-    std::scoped_lock lock{lookup_mutex};
-    for (Entry* const entry : marked_for_removal) {
-        removed_shaders.push_back(entry->data);
-
-        const auto it = lookup_cache.find(entry->addr_start);
-        ASSERT(it != lookup_cache.end());
-        lookup_cache.erase(it);
-    }
-    marked_for_removal.clear();
-
-    if (!removed_shaders.empty()) {
-        RemoveShadersFromStorage(removed_shaders);
-    }
-}
-
-void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
-    size_t index = 0;
-    while (index < entries.size()) {
-        Entry* const entry = entries[index];
-        if (!entry->Overlaps(addr, addr_end)) {
-            ++index;
-            continue;
-        }
-
-        UnmarkMemory(entry);
-        RemoveEntryFromInvalidationCache(entry);
-        marked_for_removal.push_back(entry);
-    }
-}
-
-void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
-    const u64 page_end = (entry->addr_end + SUYU_PAGESIZE - 1) >> SUYU_PAGEBITS;
-    for (u64 page = entry->addr_start >> SUYU_PAGEBITS; page < page_end; ++page) {
-        const auto entries_it = invalidation_cache.find(page);
-        ASSERT(entries_it != invalidation_cache.end());
-        std::vector<Entry*>& entries = entries_it->second;
-
-        const auto entry_it = std::ranges::find(entries, entry);
-        ASSERT(entry_it != entries.end());
-        entries.erase(entry_it);
-    }
-}
-
-void ShaderCache::UnmarkMemory(Entry* entry) {
-    if (!entry->is_memory_marked) {
-        return;
-    }
-    entry->is_memory_marked = false;
-
-    const VAddr addr = entry->addr_start;
-    const size_t size = entry->addr_end - addr;
-    device_memory.UpdatePagesCachedCount(addr, size, -1);
-}
-
-void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
-    // Remove them from the cache
-    std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
-        return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
-    });
-}
-
-ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
-    auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
-    Entry* const entry_pointer = entry.get();
-
-    lookup_cache.emplace(addr, std::move(entry));
-    return entry_pointer;
-}
-
-const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
-    auto info = std::make_unique<ShaderInfo>();
-    if (const std::optional<u64> cached_hash{env.Analyze()}) {
-        info->unique_hash = *cached_hash;
-        info->size_bytes = env.CachedSizeBytes();
-    } else {
-        // Slow path, not really hit on commercial games
-        // Build a control flow graph to get the real shader size
-        Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
-        Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
-        info->unique_hash = env.CalculateHash();
-        info->size_bytes = env.ReadSizeBytes();
-    }
-    const size_t size_bytes{info->size_bytes};
-    const ShaderInfo* const result{info.get()};
-    Register(std::move(info), cpu_addr, size_bytes);
-    return result;
+    impl->Register(std::move(data), addr, size);
 }

 } // namespace VideoCommon