From 26d42ff6e143042aad5fca6bf109c0b7c0616ba9 Mon Sep 17 00:00:00 2001 From: csmartdalton Date: Thu, 29 Aug 2024 01:30:18 +0000 Subject: [PATCH] Support Vulkan on Android Since atmoic support isn't guaranteed on Vulkan, add a new PlatformFeature called 'supportsFragmentShaderAtomics'. Various bugfixes. Download and package Vulkan validation layers with the android testing apk. Start an android Vulkan runner that doesn't do diffing yet. Diffs= cc903ee02 Support Vulkan on Android (#7915) Co-authored-by: Chris Dalton <99840794+csmartdalton@users.noreply.github.com> --- .rive_head | 2 +- .../renderer/gl/render_context_gl_impl.hpp | 1 + renderer/include/rive/renderer/gpu.hpp | 4 ++-- .../include/rive/renderer/vulkan/vkutil.hpp | 7 +++++++ .../rive_vk_bootstrap/bootstrap_project.lua | 2 +- .../rive_vk_bootstrap/vulkan_fence_pool.hpp | 5 ++++- renderer/src/d3d/render_context_d3d_impl.cpp | 1 + renderer/src/gl/pls_impl_ext_native.cpp | 4 ++++ renderer/src/gl/pls_impl_framebuffer_fetch.cpp | 7 +++++++ renderer/src/gl/pls_impl_rw_texture.cpp | 5 +++++ renderer/src/gl/pls_impl_webgl.cpp | 5 +++++ renderer/src/gl/render_context_gl_impl.cpp | 10 +++++++--- .../src/metal/render_context_metal_impl.mm | 3 +++ renderer/src/render_context.cpp | 11 ++++++++--- renderer/src/shaders/atomic_draw.glsl | 1 - renderer/src/shaders/glsl.glsl | 2 +- .../src/vulkan/render_context_vulkan_impl.cpp | 18 +++++++++++++++--- renderer/src/vulkan/vkutil.cpp | 1 - 18 files changed, 72 insertions(+), 17 deletions(-) diff --git a/.rive_head b/.rive_head index cb604e1a..5f7b8b1f 100644 --- a/.rive_head +++ b/.rive_head @@ -1 +1 @@ -c7bb5eb5ae34e300171bc6e48e26216170b094c7 +cc903ee0236a43b850488e029f270e5da23625b7 diff --git a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp index bf70964f..1409cd2f 100644 --- a/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp +++ b/renderer/include/rive/renderer/gl/render_context_gl_impl.hpp @@ -68,6 +68,7 @@ class RenderContextGLImpl : public RenderContextHelperImpl virtual void init(rcp) {} virtual bool supportsRasterOrdering(const GLCapabilities&) const = 0; + virtual bool supportsFragmentShaderAtomics(const GLCapabilities&) const = 0; virtual void activatePixelLocalStorage(RenderContextGLImpl*, const FlushDescriptor&) = 0; virtual void deactivatePixelLocalStorage(RenderContextGLImpl*, const FlushDescriptor&) = 0; diff --git a/renderer/include/rive/renderer/gpu.hpp b/renderer/include/rive/renderer/gpu.hpp index 5a310472..3f586db3 100644 --- a/renderer/include/rive/renderer/gpu.hpp +++ b/renderer/include/rive/renderer/gpu.hpp @@ -93,8 +93,8 @@ constexpr static uint32_t kGradTextureWidthInSimpleRamps = kGradTextureWidth / 2 // Backend-specific capabilities/workarounds and fine tuning. struct PlatformFeatures { - bool supportsPixelLocalStorage = true; - bool supportsRasterOrdering = true; // Can pixel local storage accesses be raster ordered? + bool supportsRasterOrdering = false; // InterlockMode::rasterOrdering. + bool supportsFragmentShaderAtomics = false; // InterlockMode::atomics. bool supportsKHRBlendEquations = false; // Use KHR_blend_equation_advanced in depthStencil mode? bool supportsClipPlanes = false; // Required for @ENABLE_CLIP_RECT in depthStencil mode. bool supportsBindlessTextures = false; diff --git a/renderer/include/rive/renderer/vulkan/vkutil.hpp b/renderer/include/rive/renderer/vulkan/vkutil.hpp index ab2cf7ae..69c22e54 100644 --- a/renderer/include/rive/renderer/vulkan/vkutil.hpp +++ b/renderer/include/rive/renderer/vulkan/vkutil.hpp @@ -30,6 +30,13 @@ inline static void vk_check(VkResult res, const char* file, int line) #define VK_CHECK(x) ::rive::gpu::vkutil::vk_check(x, __FILE__, __LINE__) +constexpr static uint32_t kVendorAMD = 0x1002; +constexpr static uint32_t kVendorImgTec = 0x1010; +constexpr static uint32_t kVendorNVIDIA = 0x10DE; +constexpr static uint32_t kVendorARM = 0x13B5; +constexpr static uint32_t kVendorQualcomm = 0x5143; +constexpr static uint32_t kVendorINTEL = 0x8086; + constexpr static VkColorComponentFlags kColorWriteMaskRGBA = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; diff --git a/renderer/rive_vk_bootstrap/bootstrap_project.lua b/renderer/rive_vk_bootstrap/bootstrap_project.lua index b83def46..c938c64d 100644 --- a/renderer/rive_vk_bootstrap/bootstrap_project.lua +++ b/renderer/rive_vk_bootstrap/bootstrap_project.lua @@ -8,7 +8,7 @@ if not vulkan_headers or not vulkan_memory_allocator then end local dependency = require('dependency') -vk_bootstrap = dependency.github('charles-lunarg/vk-bootstrap', 'v1.3.292') +vk_bootstrap = dependency.github('charles-lunarg/vk-bootstrap', '30a13b2') includedirs({ 'include' }) diff --git a/renderer/rive_vk_bootstrap/include/rive_vk_bootstrap/vulkan_fence_pool.hpp b/renderer/rive_vk_bootstrap/include/rive_vk_bootstrap/vulkan_fence_pool.hpp index c80553d2..f2517e77 100644 --- a/renderer/rive_vk_bootstrap/include/rive_vk_bootstrap/vulkan_fence_pool.hpp +++ b/renderer/rive_vk_bootstrap/include/rive_vk_bootstrap/vulkan_fence_pool.hpp @@ -44,7 +44,10 @@ class VulkanFence : public CommandBufferCompletionFence void wait() override { - m_vk->WaitForFences(m_vk->device, 1, &m_vkFence, VK_TRUE, VK_WHOLE_SIZE); + while (m_vk->WaitForFences(m_vk->device, 1, &m_vkFence, VK_TRUE, 1000) == VK_TIMEOUT) + { + // Keep waiting. + } } private: diff --git a/renderer/src/d3d/render_context_d3d_impl.cpp b/renderer/src/d3d/render_context_d3d_impl.cpp index ab06c5e9..6e9fb80c 100644 --- a/renderer/src/d3d/render_context_d3d_impl.cpp +++ b/renderer/src/d3d/render_context_d3d_impl.cpp @@ -144,6 +144,7 @@ RenderContextD3DImpl::RenderContextD3DImpl(ComPtr gpu, { m_platformFeatures.invertOffscreenY = true; m_platformFeatures.supportsRasterOrdering = d3dCapabilities.supportsRasterizerOrderedViews; + m_platformFeatures.supportsFragmentShaderAtomics = true; // Create a default raster state for path and offscreen draws. D3D11_RASTERIZER_DESC rasterDesc; diff --git a/renderer/src/gl/pls_impl_ext_native.cpp b/renderer/src/gl/pls_impl_ext_native.cpp index 9114c55a..ad0e2c88 100644 --- a/renderer/src/gl/pls_impl_ext_native.cpp +++ b/renderer/src/gl/pls_impl_ext_native.cpp @@ -81,6 +81,10 @@ class RenderContextGLImpl::PLSImplEXTNative : public RenderContextGLImpl::PixelL void init(rcp state) override { m_state = std::move(state); } bool supportsRasterOrdering(const GLCapabilities&) const override { return true; } + bool supportsFragmentShaderAtomics(const GLCapabilities& capabilities) const override + { + return false; + } void activatePixelLocalStorage(RenderContextGLImpl* impl, const FlushDescriptor& desc) override { diff --git a/renderer/src/gl/pls_impl_framebuffer_fetch.cpp b/renderer/src/gl/pls_impl_framebuffer_fetch.cpp index 2b53080a..b5c02c40 100644 --- a/renderer/src/gl/pls_impl_framebuffer_fetch.cpp +++ b/renderer/src/gl/pls_impl_framebuffer_fetch.cpp @@ -27,6 +27,13 @@ class RenderContextGLImpl::PLSImplFramebufferFetch bool supportsRasterOrdering(const GLCapabilities&) const override { return true; } + bool supportsFragmentShaderAtomics(const GLCapabilities& capabilities) const override + { + // It only makes sense to use atomic mode if the driver allows us to turn + // raster ordering OFF with framebuffer fetches. + return capabilities.QCOM_shader_framebuffer_fetch_noncoherent; + } + void activatePixelLocalStorage(RenderContextGLImpl* plsContextImpl, const FlushDescriptor& desc) override { diff --git a/renderer/src/gl/pls_impl_rw_texture.cpp b/renderer/src/gl/pls_impl_rw_texture.cpp index 4938c935..b9f6ac88 100644 --- a/renderer/src/gl/pls_impl_rw_texture.cpp +++ b/renderer/src/gl/pls_impl_rw_texture.cpp @@ -36,6 +36,11 @@ class RenderContextGLImpl::PLSImplRWTexture : public RenderContextGLImpl::PixelL capabilities.INTEL_fragment_shader_ordering; } + bool supportsFragmentShaderAtomics(const GLCapabilities& capabilities) const override + { + return true; + } + void activatePixelLocalStorage(RenderContextGLImpl* plsContextImpl, const FlushDescriptor& desc) override { diff --git a/renderer/src/gl/pls_impl_webgl.cpp b/renderer/src/gl/pls_impl_webgl.cpp index 99a330a0..6a72c234 100644 --- a/renderer/src/gl/pls_impl_webgl.cpp +++ b/renderer/src/gl/pls_impl_webgl.cpp @@ -167,6 +167,11 @@ class RenderContextGLImpl::PLSImplWebGL : public RenderContextGLImpl::PixelLocal return capabilities.ANGLE_shader_pixel_local_storage_coherent; } + bool supportsFragmentShaderAtomics(const GLCapabilities& capabilities) const override + { + return false; + } + void activatePixelLocalStorage(RenderContextGLImpl* plsContextImpl, const FlushDescriptor& desc) override { diff --git a/renderer/src/gl/render_context_gl_impl.cpp b/renderer/src/gl/render_context_gl_impl.cpp index 72a95b14..b2c6ef38 100644 --- a/renderer/src/gl/render_context_gl_impl.cpp +++ b/renderer/src/gl/render_context_gl_impl.cpp @@ -71,9 +71,13 @@ RenderContextGLImpl::RenderContextGLImpl(const char* rendererString, m_state(make_rcp(m_capabilities)) { - m_platformFeatures.supportsPixelLocalStorage = m_plsImpl != nullptr; - m_platformFeatures.supportsRasterOrdering = m_platformFeatures.supportsPixelLocalStorage && - m_plsImpl->supportsRasterOrdering(m_capabilities); + if (m_plsImpl != nullptr) + { + m_platformFeatures.supportsRasterOrdering = + m_plsImpl->supportsRasterOrdering(m_capabilities); + m_platformFeatures.supportsFragmentShaderAtomics = + m_plsImpl->supportsFragmentShaderAtomics(m_capabilities); + } if (m_capabilities.KHR_blend_equation_advanced_coherent) { m_platformFeatures.supportsKHRBlendEquations = true; diff --git a/renderer/src/metal/render_context_metal_impl.mm b/renderer/src/metal/render_context_metal_impl.mm index f1ac5b5d..bd720b41 100644 --- a/renderer/src/metal/render_context_metal_impl.mm +++ b/renderer/src/metal/render_context_metal_impl.mm @@ -290,6 +290,7 @@ void onUnmapAndSubmitBuffer(int bufferIdx, size_t mapSizeInBytes) override {} m_platformFeatures.invertOffscreenY = true; #ifdef RIVE_IOS m_platformFeatures.supportsRasterOrdering = true; + m_platformFeatures.supportsFragmentShaderAtomics = false; if (!is_apple_ios_silicon(m_gpu)) { // The PowerVR GPU, at least on A10, has fp16 precision issues. We can't use the the bottom @@ -299,9 +300,11 @@ void onUnmapAndSubmitBuffer(int bufferIdx, size_t mapSizeInBytes) override {} #elif defined(RIVE_IOS_SIMULATOR) // The simulator does not support framebuffer reads. Fall back on atomic mode. m_platformFeatures.supportsRasterOrdering = false; + m_platformFeatures.supportsFragmentShaderAtomics = true; #else m_platformFeatures.supportsRasterOrdering = [m_gpu supportsFamily:MTLGPUFamilyApple1] && !contextOptions.disableFramebufferReads; + m_platformFeatures.supportsFragmentShaderAtomics = true; #endif m_platformFeatures.atomicPLSMustBeInitializedAsDraw = true; diff --git a/renderer/src/render_context.cpp b/renderer/src/render_context.cpp index 67d961aa..59aaca40 100644 --- a/renderer/src/render_context.cpp +++ b/renderer/src/render_context.cpp @@ -223,9 +223,11 @@ void RenderContext::beginFrame(const FrameDescriptor& frameDescriptor) assert(frameDescriptor.renderTargetWidth > 0); assert(frameDescriptor.renderTargetHeight > 0); m_frameDescriptor = frameDescriptor; - if (!platformFeatures().supportsPixelLocalStorage) + if (!platformFeatures().supportsRasterOrdering && + !platformFeatures().supportsFragmentShaderAtomics) { - // Use 4x MSAA if we don't have pixel local storage and MSAA wasn't specified. + // We don't have pixel local storage in any form. Use 4x MSAA if + // msaaSampleCount wasn't already specified. m_frameDescriptor.msaaSampleCount = m_frameDescriptor.msaaSampleCount > 0 ? m_frameDescriptor.msaaSampleCount : 4; } @@ -233,12 +235,15 @@ void RenderContext::beginFrame(const FrameDescriptor& frameDescriptor) { m_frameInterlockMode = gpu::InterlockMode::depthStencil; } - else if (m_frameDescriptor.disableRasterOrdering || !platformFeatures().supportsRasterOrdering) + else if ((!platformFeatures().supportsRasterOrdering || + m_frameDescriptor.disableRasterOrdering) && + platformFeatures().supportsFragmentShaderAtomics) { m_frameInterlockMode = gpu::InterlockMode::atomics; } else { + assert(platformFeatures().supportsRasterOrdering); m_frameInterlockMode = gpu::InterlockMode::rasterOrdering; } m_frameShaderFeaturesMask = gpu::ShaderFeaturesMaskFor(m_frameInterlockMode); diff --git a/renderer/src/shaders/atomic_draw.glsl b/renderer/src/shaders/atomic_draw.glsl index b3d7355d..357a9737 100644 --- a/renderer/src/shaders/atomic_draw.glsl +++ b/renderer/src/shaders/atomic_draw.glsl @@ -340,7 +340,6 @@ half4 resolve_path_color(half coverageCount, } if (clipID != 0u) { - clipData = PLS_LOADUI(clipBuffer); half clipCoverage = clipID == (clipData >> 16u) ? unpackHalf2x16(clipData).r : .0; coverage = min(coverage, clipCoverage); } diff --git a/renderer/src/shaders/glsl.glsl b/renderer/src/shaders/glsl.glsl index f179f1f2..4f1dd597 100644 --- a/renderer/src/shaders/glsl.glsl +++ b/renderer/src/shaders/glsl.glsl @@ -338,7 +338,7 @@ layout(location = IDX) out lowp vec4 NAME #define PLS_DECLUI(IDX, NAME) \ layout(input_attachment_index = IDX, binding = IDX, set = PLS_TEXTURE_BINDINGS_SET) \ - uniform lowp usubpassInput _in_##NAME; \ + uniform highp usubpassInput _in_##NAME; \ layout(location = IDX) out highp uvec4 NAME #define PLS_BLOCK_END diff --git a/renderer/src/vulkan/render_context_vulkan_impl.cpp b/renderer/src/vulkan/render_context_vulkan_impl.cpp index d3367032..ccf785bd 100644 --- a/renderer/src/vulkan/render_context_vulkan_impl.cpp +++ b/renderer/src/vulkan/render_context_vulkan_impl.cpp @@ -1682,10 +1682,21 @@ RenderContextVulkanImpl::RenderContextVulkanImpl(VkInstance instance, m_colorRampPipeline(std::make_unique(m_vk)), m_tessellatePipeline(std::make_unique(m_vk)) { - m_platformFeatures.supportsPixelLocalStorage = features.fragmentStoresAndAtomics; m_platformFeatures.supportsRasterOrdering = features.rasterizationOrderColorAttachmentAccess; + m_platformFeatures.supportsFragmentShaderAtomics = features.fragmentStoresAndAtomics; m_platformFeatures.invertOffscreenY = false; m_platformFeatures.uninvertOnScreenY = true; + + VkPhysicalDeviceProperties physicalDeviceProperties; + m_vk->GetPhysicalDeviceProperties(m_vk->physicalDevice, &physicalDeviceProperties); + if (physicalDeviceProperties.vendorID == vkutil::kVendorQualcomm) + { + // Qualcomm advertises EXT_rasterization_order_attachment_access, but it's + // slow. Use atomics instead on this platform. + m_platformFeatures.supportsRasterOrdering = false; + // Pixel4 struggles with fine-grained fp16 path IDs. + m_platformFeatures.pathIDGranularity = 2; + } } void RenderContextVulkanImpl::initGPUObjects() @@ -2098,7 +2109,7 @@ void RenderContextVulkanImpl::flush(const FlushDescriptor& desc) { if (desc.interlockMode == gpu::InterlockMode::depthStencil) { - return; // TODO: support MSAA. + return; } auto commandBuffer = reinterpret_cast(desc.externalCommandBuffer); @@ -2915,7 +2926,8 @@ std::unique_ptr RenderContextVulkanImpl::MakeContext( features, fp_vkGetInstanceProcAddr, fp_vkGetDeviceProcAddr)); - if (!impl->platformFeatures().supportsPixelLocalStorage) + if (!impl->platformFeatures().supportsRasterOrdering && + !impl->platformFeatures().supportsFragmentShaderAtomics) { return nullptr; // TODO: implement MSAA. } diff --git a/renderer/src/vulkan/vkutil.cpp b/renderer/src/vulkan/vkutil.cpp index ae826c15..ddd9665b 100644 --- a/renderer/src/vulkan/vkutil.cpp +++ b/renderer/src/vulkan/vkutil.cpp @@ -189,7 +189,6 @@ Texture::Texture(rcp vk, const VkImageCreateInfo& info) : &m_vmaAllocation, nullptr) == VK_SUCCESS) { - printf("SUCCESS AT TRANSIENT LAZY!\n"); return; } }