diff --git a/WickedEngine/shaders/filterEnvMapCS.hlsl b/WickedEngine/shaders/filterEnvMapCS.hlsl index 54dcbae86c..c0e580458f 100644 --- a/WickedEngine/shaders/filterEnvMapCS.hlsl +++ b/WickedEngine/shaders/filterEnvMapCS.hlsl @@ -3,14 +3,32 @@ PUSHCONSTANT(push, FilterEnvmapPushConstants); +float D_GGX(float NdotH, float roughness) +{ + float a = NdotH * roughness; + float k = roughness / (1.0 - NdotH * NdotH + a * a); + return k * k * (1.0 / PI); +} + // From "Real Shading in UnrealEngine 4" by Brian Karis, page 4 // https://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf -float3 ImportanceSampleGGX(float2 Xi, float Roughness, float3 N) +float4 ImportanceSampleGGX(float2 Xi, float Roughness, float3 N) { float a = Roughness * Roughness; float Phi = 2 * PI * Xi.x; float CosTheta = sqrt((1 - Xi.y) / (1 + (a * a - 1) * Xi.y)); float SinTheta = sqrt(1 - CosTheta * CosTheta); + + // Additional PDF: + // https://github.com/KhronosGroup/glTF-Sample-Viewer/blob/main/source/shaders/ibl_filtering.frag + float pdf = D_GGX(CosTheta, a); + // Apply the Jacobian to obtain a pdf that is parameterized by l + // see https://bruop.github.io/ibl/ + // Typically you'd have the following: + // float pdf = D_GGX(NoH, roughness) * NoH / (4.0 * VoH); + // but since V = N => VoH == NoH + pdf /= 4.0; + float3 H; H.x = SinTheta * cos(Phi); H.y = SinTheta * sin(Phi); @@ -19,11 +37,40 @@ float3 ImportanceSampleGGX(float2 Xi, float Roughness, float3 N) float3 TangentX = normalize(cross(UpVector, N)); float3 TangentY = cross(N, TangentX); // Tangent to world space - return TangentX * H.x + TangentY * H.y + N * H.z; + return float4(TangentX * H.x + TangentY * H.y + N * H.z, pdf); +} + +// Mipmap Filtered Samples (GPU Gems 3, 20.4) +// https://developer.nvidia.com/gpugems/gpugems3/part-iii-rendering/chapter-20-gpu-based-importance-sampling +// https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf +float computeLod(float pdf, uint width, uint sampleCount) +{ + // // Solid angle of current sample -- bigger for less likely samples + // float omegaS = 1.0 / (float(u_sampleCount) * pdf); + // // Solid angle of texel + // // note: the factor of 4.0 * MATH_PI + // float omegaP = 4.0 * MATH_PI / (6.0 * float(u_width) * float(u_width)); + // // Mip level is determined by the ratio of our sample's solid angle to a texel's solid angle + // // note that 0.5 * log2 is equivalent to log4 + // float lod = 0.5 * log2(omegaS / omegaP); + + // babylon introduces a factor of K (=4) to the solid angle ratio + // this helps to avoid undersampling the environment map + // this does not appear in the original formulation by Jaroslav Krivanek and Mark Colbert + // log4(4) == 1 + // lod += 1.0; + + // We achieved good results by using the original formulation from Krivanek & Colbert adapted to cubemaps + + // https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf + float lod = 0.5 * log2(6.0 * float(width) * float(width) / (float(sampleCount) * pdf)); + + + return lod; } static const uint THREAD_OFFLOAD = 16; -groupshared uint2 shared_colors[GENERATEMIPCHAIN_2D_BLOCK_SIZE][GENERATEMIPCHAIN_2D_BLOCK_SIZE][THREAD_OFFLOAD]; +groupshared float4 shared_colors[GENERATEMIPCHAIN_2D_BLOCK_SIZE][GENERATEMIPCHAIN_2D_BLOCK_SIZE][THREAD_OFFLOAD]; [numthreads(GENERATEMIPCHAIN_2D_BLOCK_SIZE, GENERATEMIPCHAIN_2D_BLOCK_SIZE, THREAD_OFFLOAD)] void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID) @@ -45,17 +92,22 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID) for (uint i = threadstart; i < push.filterRayCount; i += THREAD_OFFLOAD) { float2 Xi = hammersley2d(i, push.filterRayCount); - float3 H = ImportanceSampleGGX(Xi, push.filterRoughness, N); + float4 importanceSample = ImportanceSampleGGX(Xi, push.filterRoughness, N); + float3 H = importanceSample.xyz; + float pdf = importanceSample.w; float3 L = 2 * dot(V, H) * H - V; float NoL = saturate(dot(N, L)); if (NoL > 0) { - col += input.SampleLevel(sampler_linear_clamp, L, 0) * NoL; + uint2 dim; + input.GetDimensions(dim.x, dim.y); // input to computeLod needs to be resolution of top mip, not the current filter resolution + float lod = computeLod(pdf, dim.x, push.filterRayCount); + col += input.SampleLevel(sampler_linear_clamp, L, lod) * NoL; } } - shared_colors[GTid.x][GTid.y][threadstart] = pack_half4(col); + shared_colors[GTid.x][GTid.y][threadstart] = col; GroupMemoryBarrierWithGroupSync(); if(threadstart == 0) @@ -63,7 +115,7 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID) float4 accum = 0; for (uint j = 0; j < THREAD_OFFLOAD;++j) { - accum += unpack_half4(shared_colors[GTid.x][GTid.y][j]); + accum += shared_colors[GTid.x][GTid.y][j]; } accum /= accum.a; output[uint3(DTid.xy, face)] = accum; diff --git a/WickedEngine/wiRenderPath3D_PathTracing.cpp b/WickedEngine/wiRenderPath3D_PathTracing.cpp index 06f6351f99..12b12264f4 100644 --- a/WickedEngine/wiRenderPath3D_PathTracing.cpp +++ b/WickedEngine/wiRenderPath3D_PathTracing.cpp @@ -452,7 +452,8 @@ namespace wi nullptr, getEyeAdaptionEnabled() ? &luminanceResources.luminance : nullptr, getBloomEnabled() ? &bloomResources.texture_bloom : nullptr, - colorspace + colorspace, + getTonemap() ); lastPostprocessRT = &rtPostprocess; diff --git a/WickedEngine/wiRenderer.cpp b/WickedEngine/wiRenderer.cpp index 02f2875784..40fc855b6c 100644 --- a/WickedEngine/wiRenderer.cpp +++ b/WickedEngine/wiRenderer.cpp @@ -7385,6 +7385,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) Texture envrenderingDepthBuffer; Texture envrenderingColorBuffer_MSAA; Texture envrenderingColorBuffer; + Texture envrenderingColorBuffer_Filtered; // Find temporary render textures to fit request, or create new ones if they don't exist: union RenderTextureID @@ -7394,6 +7395,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) uint32_t width : 16; uint32_t sample_count : 3; uint32_t is_depth : 1; + uint32_t is_filtered : 1; } bits; uint32_t raw; }; @@ -7407,14 +7409,23 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) id_depth.bits.width = probe.resolution; id_depth.bits.sample_count = required_sample_count; id_depth.bits.is_depth = 1; + id_depth.bits.is_filtered = 0; envrenderingDepthBuffer = render_textures[id_depth.raw]; RenderTextureID id_color = {}; id_color.bits.width = probe.resolution; id_color.bits.sample_count = 1; id_color.bits.is_depth = 0; + id_color.bits.is_filtered = 0; envrenderingColorBuffer = render_textures[id_color.raw]; + RenderTextureID id_color_filtered = {}; + id_color_filtered.bits.width = probe.resolution; + id_color_filtered.bits.sample_count = 1; + id_color_filtered.bits.is_depth = 0; + id_color_filtered.bits.is_filtered = 1; + envrenderingColorBuffer_Filtered = render_textures[id_color_filtered.raw]; + TextureDesc desc; desc.array_size = 6; desc.height = probe.resolution; @@ -7480,12 +7491,46 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) wi::backlog::post(info); } + if (!envrenderingColorBuffer_Filtered.IsValid()) + { + desc.mip_levels = probe.texture.desc.mip_levels; + desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS; + desc.format = wi::renderer::format_rendertarget_envprobe; + desc.layout = ResourceState::SHADER_RESOURCE; + desc.misc_flags = ResourceMiscFlag::TEXTURECUBE; + desc.sample_count = 1; + device->CreateTexture(&desc, nullptr, &envrenderingColorBuffer_Filtered); + device->SetName(&envrenderingColorBuffer_Filtered, "envrenderingColorBuffer_Filtered"); + render_textures[id_color_filtered.raw] = envrenderingColorBuffer_Filtered; + + // Cubes per mip level: + for (uint32_t i = 0; i < envrenderingColorBuffer_Filtered.desc.mip_levels; ++i) + { + int subresource_index; + subresource_index = device->CreateSubresource(&envrenderingColorBuffer_Filtered, SubresourceType::SRV, 0, envrenderingColorBuffer_Filtered.desc.array_size, i, 1); + assert(subresource_index == i); + subresource_index = device->CreateSubresource(&envrenderingColorBuffer_Filtered, SubresourceType::UAV, 0, envrenderingColorBuffer_Filtered.desc.array_size, i, 1); + assert(subresource_index == i); + } + + std::string info; + info += "Created envprobe filtering target for request"; + info += "\n\tResolution = " + std::to_string(desc.width) + " * " + std::to_string(desc.height) + " * 6"; + info += "\n\tSample Count = " + std::to_string(desc.sample_count); + info += "\n\tMip Levels = " + std::to_string(desc.mip_levels); + info += "\n\tFormat = "; + info += GetFormatString(desc.format); + info += "\n\tMemory = " + wi::helper::GetMemorySizeText(ComputeTextureMemorySizeInBytes(desc)) + "\n"; + wi::backlog::post(info); + } + if (required_sample_count > 1) { RenderTextureID id_color_msaa = {}; id_color_msaa.bits.width = probe.resolution; id_color_msaa.bits.sample_count = required_sample_count; id_color_msaa.bits.is_depth = 0; + id_color_msaa.bits.is_filtered = 0; envrenderingColorBuffer_MSAA = render_textures[id_color_msaa.raw]; if (!envrenderingColorBuffer_MSAA.IsValid()) @@ -7762,7 +7807,24 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) // and we generatethe filtered MIPs from bottom to top. device->EventBegin("FilterEnvMap", cmd); { - TextureDesc desc = envrenderingColorBuffer.GetDesc(); + // Copy over whole: + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&envrenderingColorBuffer, envrenderingColorBuffer.desc.layout, ResourceState::COPY_SRC), + GPUBarrier::Image(&envrenderingColorBuffer_Filtered, envrenderingColorBuffer_Filtered.desc.layout, ResourceState::COPY_DST), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + device->CopyResource(&envrenderingColorBuffer_Filtered, &envrenderingColorBuffer, cmd); + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::COPY_SRC, envrenderingColorBuffer.desc.layout), + GPUBarrier::Image(&envrenderingColorBuffer_Filtered, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } + + TextureDesc desc = envrenderingColorBuffer_Filtered.GetDesc(); device->BindComputeShader(&shaders[CSTYPE_FILTERENVMAP], cmd); @@ -7771,18 +7833,6 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) desc.height = std::max(1u, desc.height >> mip_start); for (int i = mip_start; i > 0; --i) { - { - GPUBarrier barriers[] = { - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 0), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 1), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 2), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 3), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 4), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 5), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } - FilterEnvmapPushConstants push; push.filterResolution.x = desc.width; push.filterResolution.y = desc.height; @@ -7797,36 +7847,32 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd) { push.filterRayCount = 8192; } - push.texture_input = device->GetDescriptorIndex(&envrenderingColorBuffer, SubresourceType::SRV, std::max(0, (int)i - 1)); - push.texture_output = device->GetDescriptorIndex(&envrenderingColorBuffer, SubresourceType::UAV, i); + push.texture_input = device->GetDescriptorIndex(&envrenderingColorBuffer, SubresourceType::SRV); + push.texture_output = device->GetDescriptorIndex(&envrenderingColorBuffer_Filtered, SubresourceType::UAV, i); device->PushConstants(&push, sizeof(push), cmd); device->Dispatch( (desc.width + GENERATEMIPCHAIN_2D_BLOCK_SIZE - 1) / GENERATEMIPCHAIN_2D_BLOCK_SIZE, (desc.height + GENERATEMIPCHAIN_2D_BLOCK_SIZE - 1) / GENERATEMIPCHAIN_2D_BLOCK_SIZE, 6, - cmd); - - { - GPUBarrier barriers[] = { - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 0), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 1), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 2), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 3), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 4), - GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 5), - }; - device->Barrier(barriers, arraysize(barriers), cmd); - } + cmd + ); desc.width *= 2; desc.height *= 2; } + + { + GPUBarrier barriers[] = { + GPUBarrier::Image(&envrenderingColorBuffer_Filtered, ResourceState::UNORDERED_ACCESS, envrenderingColorBuffer_Filtered.desc.layout), + }; + device->Barrier(barriers, arraysize(barriers), cmd); + } } device->EventEnd(cmd); // Finally, the complete envmap is block compressed into the probe's texture: - BlockCompress(envrenderingColorBuffer, probe.texture, cmd); + BlockCompress(envrenderingColorBuffer_Filtered, probe.texture, cmd); }; if (vis.scene->probes.GetCount() == 0) diff --git a/WickedEngine/wiVersion.cpp b/WickedEngine/wiVersion.cpp index 2e9c2411bf..a6c7937bff 100644 --- a/WickedEngine/wiVersion.cpp +++ b/WickedEngine/wiVersion.cpp @@ -9,7 +9,7 @@ namespace wi::version // minor features, major updates, breaking compatibility changes const int minor = 71; // minor bug fixes, alterations, refactors, updates - const int revision = 284; + const int revision = 285; const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);