Skip to content

Commit

Permalink
envmap filtering uses dynamic mip selection to reduce fireflies
Browse files Browse the repository at this point in the history
  • Loading branch information
turanszkij committed Sep 3, 2023
1 parent f886379 commit bb51947
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 38 deletions.
66 changes: 59 additions & 7 deletions WickedEngine/shaders/filterEnvMapCS.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,32 @@

PUSHCONSTANT(push, FilterEnvmapPushConstants);

float D_GGX(float NdotH, float roughness)
{
float a = NdotH * roughness;
float k = roughness / (1.0 - NdotH * NdotH + a * a);
return k * k * (1.0 / PI);
}

// From "Real Shading in UnrealEngine 4" by Brian Karis, page 4
// https://blog.selfshadow.com/publications/s2013-shading-course/karis/s2013_pbs_epic_notes_v2.pdf
float3 ImportanceSampleGGX(float2 Xi, float Roughness, float3 N)
float4 ImportanceSampleGGX(float2 Xi, float Roughness, float3 N)
{
float a = Roughness * Roughness;
float Phi = 2 * PI * Xi.x;
float CosTheta = sqrt((1 - Xi.y) / (1 + (a * a - 1) * Xi.y));
float SinTheta = sqrt(1 - CosTheta * CosTheta);

// Additional PDF:
// https://github.com/KhronosGroup/glTF-Sample-Viewer/blob/main/source/shaders/ibl_filtering.frag
float pdf = D_GGX(CosTheta, a);
// Apply the Jacobian to obtain a pdf that is parameterized by l
// see https://bruop.github.io/ibl/
// Typically you'd have the following:
// float pdf = D_GGX(NoH, roughness) * NoH / (4.0 * VoH);
// but since V = N => VoH == NoH
pdf /= 4.0;

float3 H;
H.x = SinTheta * cos(Phi);
H.y = SinTheta * sin(Phi);
Expand All @@ -19,11 +37,40 @@ float3 ImportanceSampleGGX(float2 Xi, float Roughness, float3 N)
float3 TangentX = normalize(cross(UpVector, N));
float3 TangentY = cross(N, TangentX);
// Tangent to world space
return TangentX * H.x + TangentY * H.y + N * H.z;
return float4(TangentX * H.x + TangentY * H.y + N * H.z, pdf);
}

// Mipmap Filtered Samples (GPU Gems 3, 20.4)
// https://developer.nvidia.com/gpugems/gpugems3/part-iii-rendering/chapter-20-gpu-based-importance-sampling
// https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf
float computeLod(float pdf, uint width, uint sampleCount)
{
// // Solid angle of current sample -- bigger for less likely samples
// float omegaS = 1.0 / (float(u_sampleCount) * pdf);
// // Solid angle of texel
// // note: the factor of 4.0 * MATH_PI
// float omegaP = 4.0 * MATH_PI / (6.0 * float(u_width) * float(u_width));
// // Mip level is determined by the ratio of our sample's solid angle to a texel's solid angle
// // note that 0.5 * log2 is equivalent to log4
// float lod = 0.5 * log2(omegaS / omegaP);

// babylon introduces a factor of K (=4) to the solid angle ratio
// this helps to avoid undersampling the environment map
// this does not appear in the original formulation by Jaroslav Krivanek and Mark Colbert
// log4(4) == 1
// lod += 1.0;

// We achieved good results by using the original formulation from Krivanek & Colbert adapted to cubemaps

// https://cgg.mff.cuni.cz/~jaroslav/papers/2007-sketch-fis/Final_sap_0073.pdf
float lod = 0.5 * log2(6.0 * float(width) * float(width) / (float(sampleCount) * pdf));


return lod;
}

static const uint THREAD_OFFLOAD = 16;
groupshared uint2 shared_colors[GENERATEMIPCHAIN_2D_BLOCK_SIZE][GENERATEMIPCHAIN_2D_BLOCK_SIZE][THREAD_OFFLOAD];
groupshared float4 shared_colors[GENERATEMIPCHAIN_2D_BLOCK_SIZE][GENERATEMIPCHAIN_2D_BLOCK_SIZE][THREAD_OFFLOAD];

[numthreads(GENERATEMIPCHAIN_2D_BLOCK_SIZE, GENERATEMIPCHAIN_2D_BLOCK_SIZE, THREAD_OFFLOAD)]
void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID)
Expand All @@ -45,25 +92,30 @@ void main(uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID)
for (uint i = threadstart; i < push.filterRayCount; i += THREAD_OFFLOAD)
{
float2 Xi = hammersley2d(i, push.filterRayCount);
float3 H = ImportanceSampleGGX(Xi, push.filterRoughness, N);
float4 importanceSample = ImportanceSampleGGX(Xi, push.filterRoughness, N);
float3 H = importanceSample.xyz;
float pdf = importanceSample.w;
float3 L = 2 * dot(V, H) * H - V;

float NoL = saturate(dot(N, L));
if (NoL > 0)
{
col += input.SampleLevel(sampler_linear_clamp, L, 0) * NoL;
uint2 dim;
input.GetDimensions(dim.x, dim.y); // input to computeLod needs to be resolution of top mip, not the current filter resolution
float lod = computeLod(pdf, dim.x, push.filterRayCount);
col += input.SampleLevel(sampler_linear_clamp, L, lod) * NoL;
}
}

shared_colors[GTid.x][GTid.y][threadstart] = pack_half4(col);
shared_colors[GTid.x][GTid.y][threadstart] = col;
GroupMemoryBarrierWithGroupSync();

if(threadstart == 0)
{
float4 accum = 0;
for (uint j = 0; j < THREAD_OFFLOAD;++j)
{
accum += unpack_half4(shared_colors[GTid.x][GTid.y][j]);
accum += shared_colors[GTid.x][GTid.y][j];
}
accum /= accum.a;
output[uint3(DTid.xy, face)] = accum;
Expand Down
3 changes: 2 additions & 1 deletion WickedEngine/wiRenderPath3D_PathTracing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,8 @@ namespace wi
nullptr,
getEyeAdaptionEnabled() ? &luminanceResources.luminance : nullptr,
getBloomEnabled() ? &bloomResources.texture_bloom : nullptr,
colorspace
colorspace,
getTonemap()
);
lastPostprocessRT = &rtPostprocess;

Expand Down
104 changes: 75 additions & 29 deletions WickedEngine/wiRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7385,6 +7385,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
Texture envrenderingDepthBuffer;
Texture envrenderingColorBuffer_MSAA;
Texture envrenderingColorBuffer;
Texture envrenderingColorBuffer_Filtered;

// Find temporary render textures to fit request, or create new ones if they don't exist:
union RenderTextureID
Expand All @@ -7394,6 +7395,7 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
uint32_t width : 16;
uint32_t sample_count : 3;
uint32_t is_depth : 1;
uint32_t is_filtered : 1;
} bits;
uint32_t raw;
};
Expand All @@ -7407,14 +7409,23 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
id_depth.bits.width = probe.resolution;
id_depth.bits.sample_count = required_sample_count;
id_depth.bits.is_depth = 1;
id_depth.bits.is_filtered = 0;
envrenderingDepthBuffer = render_textures[id_depth.raw];

RenderTextureID id_color = {};
id_color.bits.width = probe.resolution;
id_color.bits.sample_count = 1;
id_color.bits.is_depth = 0;
id_color.bits.is_filtered = 0;
envrenderingColorBuffer = render_textures[id_color.raw];

RenderTextureID id_color_filtered = {};
id_color_filtered.bits.width = probe.resolution;
id_color_filtered.bits.sample_count = 1;
id_color_filtered.bits.is_depth = 0;
id_color_filtered.bits.is_filtered = 1;
envrenderingColorBuffer_Filtered = render_textures[id_color_filtered.raw];

TextureDesc desc;
desc.array_size = 6;
desc.height = probe.resolution;
Expand Down Expand Up @@ -7480,12 +7491,46 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
wi::backlog::post(info);
}

if (!envrenderingColorBuffer_Filtered.IsValid())
{
desc.mip_levels = probe.texture.desc.mip_levels;
desc.bind_flags = BindFlag::SHADER_RESOURCE | BindFlag::UNORDERED_ACCESS;
desc.format = wi::renderer::format_rendertarget_envprobe;
desc.layout = ResourceState::SHADER_RESOURCE;
desc.misc_flags = ResourceMiscFlag::TEXTURECUBE;
desc.sample_count = 1;
device->CreateTexture(&desc, nullptr, &envrenderingColorBuffer_Filtered);
device->SetName(&envrenderingColorBuffer_Filtered, "envrenderingColorBuffer_Filtered");
render_textures[id_color_filtered.raw] = envrenderingColorBuffer_Filtered;

// Cubes per mip level:
for (uint32_t i = 0; i < envrenderingColorBuffer_Filtered.desc.mip_levels; ++i)
{
int subresource_index;
subresource_index = device->CreateSubresource(&envrenderingColorBuffer_Filtered, SubresourceType::SRV, 0, envrenderingColorBuffer_Filtered.desc.array_size, i, 1);
assert(subresource_index == i);
subresource_index = device->CreateSubresource(&envrenderingColorBuffer_Filtered, SubresourceType::UAV, 0, envrenderingColorBuffer_Filtered.desc.array_size, i, 1);
assert(subresource_index == i);
}

std::string info;
info += "Created envprobe filtering target for request";
info += "\n\tResolution = " + std::to_string(desc.width) + " * " + std::to_string(desc.height) + " * 6";
info += "\n\tSample Count = " + std::to_string(desc.sample_count);
info += "\n\tMip Levels = " + std::to_string(desc.mip_levels);
info += "\n\tFormat = ";
info += GetFormatString(desc.format);
info += "\n\tMemory = " + wi::helper::GetMemorySizeText(ComputeTextureMemorySizeInBytes(desc)) + "\n";
wi::backlog::post(info);
}

if (required_sample_count > 1)
{
RenderTextureID id_color_msaa = {};
id_color_msaa.bits.width = probe.resolution;
id_color_msaa.bits.sample_count = required_sample_count;
id_color_msaa.bits.is_depth = 0;
id_color_msaa.bits.is_filtered = 0;
envrenderingColorBuffer_MSAA = render_textures[id_color_msaa.raw];

if (!envrenderingColorBuffer_MSAA.IsValid())
Expand Down Expand Up @@ -7762,7 +7807,24 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
// and we generatethe filtered MIPs from bottom to top.
device->EventBegin("FilterEnvMap", cmd);
{
TextureDesc desc = envrenderingColorBuffer.GetDesc();
// Copy over whole:
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&envrenderingColorBuffer, envrenderingColorBuffer.desc.layout, ResourceState::COPY_SRC),
GPUBarrier::Image(&envrenderingColorBuffer_Filtered, envrenderingColorBuffer_Filtered.desc.layout, ResourceState::COPY_DST),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
device->CopyResource(&envrenderingColorBuffer_Filtered, &envrenderingColorBuffer, cmd);
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::COPY_SRC, envrenderingColorBuffer.desc.layout),
GPUBarrier::Image(&envrenderingColorBuffer_Filtered, ResourceState::COPY_DST, ResourceState::UNORDERED_ACCESS),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}

TextureDesc desc = envrenderingColorBuffer_Filtered.GetDesc();

device->BindComputeShader(&shaders[CSTYPE_FILTERENVMAP], cmd);

Expand All @@ -7771,18 +7833,6 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
desc.height = std::max(1u, desc.height >> mip_start);
for (int i = mip_start; i > 0; --i)
{
{
GPUBarrier barriers[] = {
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 0),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 1),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 2),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 3),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 4),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::SHADER_RESOURCE, ResourceState::UNORDERED_ACCESS, i, 5),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}

FilterEnvmapPushConstants push;
push.filterResolution.x = desc.width;
push.filterResolution.y = desc.height;
Expand All @@ -7797,36 +7847,32 @@ void RefreshEnvProbes(const Visibility& vis, CommandList cmd)
{
push.filterRayCount = 8192;
}
push.texture_input = device->GetDescriptorIndex(&envrenderingColorBuffer, SubresourceType::SRV, std::max(0, (int)i - 1));
push.texture_output = device->GetDescriptorIndex(&envrenderingColorBuffer, SubresourceType::UAV, i);
push.texture_input = device->GetDescriptorIndex(&envrenderingColorBuffer, SubresourceType::SRV);
push.texture_output = device->GetDescriptorIndex(&envrenderingColorBuffer_Filtered, SubresourceType::UAV, i);
device->PushConstants(&push, sizeof(push), cmd);

device->Dispatch(
(desc.width + GENERATEMIPCHAIN_2D_BLOCK_SIZE - 1) / GENERATEMIPCHAIN_2D_BLOCK_SIZE,
(desc.height + GENERATEMIPCHAIN_2D_BLOCK_SIZE - 1) / GENERATEMIPCHAIN_2D_BLOCK_SIZE,
6,
cmd);

{
GPUBarrier barriers[] = {
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 0),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 1),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 2),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 3),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 4),
GPUBarrier::Image(&envrenderingColorBuffer, ResourceState::UNORDERED_ACCESS, ResourceState::SHADER_RESOURCE, i, 5),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
cmd
);

desc.width *= 2;
desc.height *= 2;
}

{
GPUBarrier barriers[] = {
GPUBarrier::Image(&envrenderingColorBuffer_Filtered, ResourceState::UNORDERED_ACCESS, envrenderingColorBuffer_Filtered.desc.layout),
};
device->Barrier(barriers, arraysize(barriers), cmd);
}
}
device->EventEnd(cmd);

// Finally, the complete envmap is block compressed into the probe's texture:
BlockCompress(envrenderingColorBuffer, probe.texture, cmd);
BlockCompress(envrenderingColorBuffer_Filtered, probe.texture, cmd);
};

if (vis.scene->probes.GetCount() == 0)
Expand Down
2 changes: 1 addition & 1 deletion WickedEngine/wiVersion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ namespace wi::version
// minor features, major updates, breaking compatibility changes
const int minor = 71;
// minor bug fixes, alterations, refactors, updates
const int revision = 284;
const int revision = 285;

const std::string version_string = std::to_string(major) + "." + std::to_string(minor) + "." + std::to_string(revision);

Expand Down

0 comments on commit bb51947

Please sign in to comment.