From 1ef4a902ac4b48a3eaf264a4acab6433a927fd2c Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 3 Mar 2020 11:24:52 -0800 Subject: [PATCH 01/48] Replaced regular temporal AA followed by upscale with TAAU. --- src/refresh/vkpt/asvgf.c | 8 +-- src/refresh/vkpt/bloom.c | 26 ++++----- src/refresh/vkpt/draw.c | 2 +- src/refresh/vkpt/main.c | 56 +++++++++++++++---- src/refresh/vkpt/shader/asvgf_taa.comp | 32 ++++++++--- src/refresh/vkpt/shader/bloom_blur.comp | 4 +- src/refresh/vkpt/shader/bloom_composite.comp | 4 +- src/refresh/vkpt/shader/global_textures.h | 18 +++--- src/refresh/vkpt/shader/global_ubo.h | 12 ++-- src/refresh/vkpt/shader/primary_rays.rgen | 13 +++-- .../vkpt/shader/tone_mapping_apply.comp | 2 +- .../vkpt/shader/tone_mapping_histogram.comp | 4 +- src/refresh/vkpt/tone_mapping.c | 8 +-- 13 files changed, 121 insertions(+), 68 deletions(-) diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index 9bd08ff89..1754d619b 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -566,13 +566,7 @@ VkResult vkpt_taa(VkCommandBuffer cmd_buf) vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_taa, 0, LENGTH(desc_sets), desc_sets, 0, 0); - VkExtent2D dispatch_size = qvk.extent_render; - - if(dispatch_size.width < qvk.extent_screen_images.width) - dispatch_size.width += 8; - - if (dispatch_size.height < qvk.extent_screen_images.height) - dispatch_size.height += 8; + VkExtent2D dispatch_size = qvk.extent_unscaled; vkCmdDispatch(cmd_buf, (dispatch_size.width + 15) / 16, diff --git a/src/refresh/vkpt/bloom.c b/src/refresh/vkpt/bloom.c index a602afe15..b93a98451 100644 --- a/src/refresh/vkpt/bloom.c +++ b/src/refresh/vkpt/bloom.c @@ -55,7 +55,7 @@ static float under_water_animation; static void compute_push_constants() { - float sigma_pixels = bloom_sigma * qvk.extent_render.height; + float sigma_pixels = bloom_sigma * qvk.extent_unscaled.height; float effective_sigma = sigma_pixels * 0.25f; effective_sigma = min(effective_sigma, 100.f); @@ -320,20 +320,20 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) }; VkOffset3D offset_LR_mip_0 = { - .x = qvk.extent_screen_images.width, - .y = qvk.extent_screen_images.height, + .x = qvk.extent_unscaled.width, + .y = qvk.extent_unscaled.height, .z = 1 }; VkOffset3D offset_LR_mip_1 = { - .x = qvk.extent_screen_images.width / 2, - .y = qvk.extent_screen_images.height / 2, + .x = qvk.extent_unscaled.width / 2, + .y = qvk.extent_unscaled.height / 2, .z = 1 }; VkOffset3D offset_LR_mip_2 = { - .x = qvk.extent_screen_images.width / 4, - .y = qvk.extent_screen_images.height / 4, + .x = qvk.extent_unscaled.width / 4, + .y = qvk.extent_unscaled.height / 4, .z = 1 }; @@ -395,8 +395,8 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) vkCmdPushConstants(cmd_buf, pipeline_layout_blur, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_hblur), &push_constants_hblur); vkCmdDispatch(cmd_buf, - (qvk.extent_render.width / 4 + 15) / 16, - (qvk.extent_render.height / 4 + 15) / 16, + (qvk.extent_unscaled.width / 4 + 15) / 16, + (qvk.extent_unscaled.height / 4 + 15) / 16, 1); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_BLOOM_HBLUR]); @@ -405,8 +405,8 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) vkCmdPushConstants(cmd_buf, pipeline_layout_blur, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_vblur), &push_constants_vblur); vkCmdDispatch(cmd_buf, - (qvk.extent_render.width / 4 + 15) / 16, - (qvk.extent_render.height / 4 + 15) / 16, + (qvk.extent_unscaled.width / 4 + 15) / 16, + (qvk.extent_unscaled.height / 4 + 15) / 16, 1); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_BLOOM_VBLUR]); @@ -476,8 +476,8 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_composite, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, - (qvk.extent_render.width + 15) / 16, - (qvk.extent_render.height + 15) / 16, + (qvk.extent_unscaled.width + 15) / 16, + (qvk.extent_unscaled.height + 15) / 16, 1); } diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index 70637dbe5..a99f2c54b 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -563,7 +563,7 @@ vkpt_final_blit_simple(VkCommandBuffer cmd_buf) ); VkOffset3D blit_size = { - .x = qvk.extent_render.width,.y = qvk.extent_render.height,.z = 1 + .x = qvk.extent_unscaled.width,.y = qvk.extent_unscaled.height,.z = 1 }; VkOffset3D blit_size_unscaled = { .x = qvk.extent_unscaled.width,.y = qvk.extent_unscaled.height,.z = 1 diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index fd5e1c879..a8731a190 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -104,6 +104,9 @@ static qboolean frame_ready = qfalse; static float sky_rotation = 0.f; static vec3_t sky_axis = { 0.f }; +#define NUM_TAA_SAMPLES 128 +static vec2_t taa_samples[NUM_TAA_SAMPLES]; + typedef enum { VKPT_INIT_DEFAULT = (0), VKPT_INIT_SWAPCHAIN_RECREATE = (1 << 1), @@ -2104,6 +2107,10 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->prev_height = qvk.extent_render_prev.height; ubo->inv_width = 1.0f / (float)qvk.extent_render.width; ubo->inv_height = 1.0f / (float)qvk.extent_render.height; + ubo->unscaled_width = qvk.extent_unscaled.width; + ubo->unscaled_height = qvk.extent_unscaled.height; + ubo->inv_unscaled_width = 1.0f / ubo->unscaled_width; + ubo->inv_unscaled_height = 1.0f / ubo->unscaled_height; ubo->current_gpu_slice_width = qvk.gpu_slice_width; ubo->prev_gpu_slice_width = qvk.gpu_slice_width_prev; ubo->screen_image_width = qvk.extent_screen_images.width; @@ -2205,6 +2212,18 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->flt_taa = 0; } + if (ubo->flt_taa) + { + int taa_index = (int)(qvk.frame_counter % NUM_TAA_SAMPLES); + ubo->sub_pixel_jitter[0] = taa_samples[taa_index][0]; + ubo->sub_pixel_jitter[1] = taa_samples[taa_index][1]; + } + else + { + ubo->sub_pixel_jitter[0] = 0.f; + ubo->sub_pixel_jitter[1] = 0.f; + } + ubo->first_person_model = cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON; memset(ubo->environment_rotation_matrix, 0, sizeof(ubo->environment_rotation_matrix)); @@ -2598,7 +2617,12 @@ static void drs_process() if (cvar_drs_enable->integer == 0) { num_valid_frames = 0; - drs_effective_scale = 0; + + if (is_accumulation_rendering_active()) + drs_effective_scale = max(100, scr_viewsize->integer); + else + drs_effective_scale = 0; + return; } @@ -2751,15 +2775,7 @@ R_EndFrame_RTX(void) if (frame_ready) { - VkExtent2D extent_render_double; - extent_render_double.width = qvk.extent_render.width * 2; - extent_render_double.height = qvk.extent_render.height * 2; - - if (extents_equal(qvk.extent_render, qvk.extent_unscaled) || - extents_equal(extent_render_double, qvk.extent_unscaled) && drs_current_scale == 0) // don't do nearest filter 2x upscale with DRS enabled - vkpt_final_blit_simple(cmd_buf); - else - vkpt_final_blit_filtered(cmd_buf); + vkpt_final_blit_simple(cmd_buf); frame_ready = qfalse; } @@ -2869,6 +2885,20 @@ vkpt_show_pvs(void) cluster_debug_index = vkpt_refdef.fd->feedback.lookatcluster; } +static float halton(int base, int index) { + float f = 1.f; + float r = 0.f; + int i = index; + + while (i > 0) + { + f = f / base; + r = r + f * (i % base); + i = i / base; + } + return r; +}; + /* called when the library is loaded */ qboolean R_Init_RTX(qboolean total) @@ -2989,6 +3019,12 @@ R_Init_RTX(qboolean total) qvk.sintab[i] = sinf(i * (2 * M_PI / 255)); } + for (int i = 0; i < NUM_TAA_SAMPLES; i++) + { + taa_samples[i][0] = halton(2, i + 1) - 0.5f; + taa_samples[i][1] = halton(3, i + 1) - 0.5f; + } + return qtrue; } diff --git a/src/refresh/vkpt/shader/asvgf_taa.comp b/src/refresh/vkpt/shader/asvgf_taa.comp index 4b1fc5247..9cb58d31a 100644 --- a/src/refresh/vkpt/shader/asvgf_taa.comp +++ b/src/refresh/vkpt/shader/asvgf_taa.comp @@ -89,18 +89,28 @@ void get_moments(ivec2 ipos, int r, out vec3 mom1, out vec3 mom2) } } +float get_sample_weight(vec2 delta, float scale) +{ + return clamp(1 - scale * dot(delta, delta), 0, 1); +} + void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if (ipos.x >= global_ubo.width || ipos.y >= global_ubo.height) + if (ipos.x >= global_ubo.unscaled_width || ipos.y >= global_ubo.unscaled_height) { imageStore(IMG_TAA_OUTPUT, ipos, vec4(0)); return; } - vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, ipos, 0); + // Calculate position in the render buffer (at the lower render resolution) + vec2 nearest_render_pos = vec2(ipos.x + 0.5f, ipos.y + 0.5f) * vec2(global_ubo.width * global_ubo.inv_unscaled_width, global_ubo.height * global_ubo.inv_unscaled_height) - global_ubo.sub_pixel_jitter - vec2(0.5f); + ivec2 int_render_pos = ivec2(round(nearest_render_pos.x), round(nearest_render_pos.y)); + int_render_pos = clamp(int_render_pos, ivec2(0), ivec2(global_ubo.width - 1, global_ubo.height - 1)); + + vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, int_render_pos, 0); vec3 color_center = PQEncode(linear_color_center.rgb); int checkerboard_flags = int(linear_color_center.a); @@ -117,7 +127,7 @@ main() int num_pix; // Obtain the color moments for the surrounding pixels. - get_moments(ipos, 1, mom1, mom2); + get_moments(int_render_pos, 1, mom1, mom2); num_pix = 9; // Remove or reduce sparkles by clamping the color of the center pixel to its surroundings @@ -142,7 +152,7 @@ main() const int r = 1; for(int yy = -r; yy <= r; yy++) { for(int xx = -r; xx <= r; xx++) { - ivec2 p = ipos + ivec2(xx, yy); + ivec2 p = int_render_pos + ivec2(xx, yy); vec2 m = texelFetch(TEX_FLAT_MOTION, p, 0).xy; float l = dot(m, m); if(l > len) { @@ -153,11 +163,13 @@ main() } } } + + motion *= vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); - vec2 pos_prev = ((vec2(ipos) + vec2(0.5)) * vec2(global_ubo.inv_width, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width, global_ubo.prev_height); + vec2 pos_prev = vec2(ipos) + vec2(0.5) + motion.xy; if(all(greaterThanEqual(ivec2(pos_prev), ivec2(1))) - && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.width, global_ubo.height) - 1))) + && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - 1))) { // Motion vector was valid - sample the previous frame vec3 color_prev = sample_texture_catmull_rom(TEX_ASVGF_TAA_B, pos_prev).rgb; @@ -180,7 +192,11 @@ main() } // Mix the new color with the clamped previous color - color_output = mix(color_center, color_prev, clamp(global_ubo.flt_taa_history_weight, 0, 0.999)); + float motion_weight = smoothstep(0, 1.0f, sqrt(dot(motion, motion))); + float sample_weight = get_sample_weight(nearest_render_pos - int_render_pos, global_ubo.unscaled_width * global_ubo.inv_width); + float pixel_weight = max(motion_weight, sample_weight) * 0.1f; + pixel_weight = clamp(pixel_weight, 0, 1); + color_output = mix(color_prev, color_center, pixel_weight); } } @@ -203,7 +219,7 @@ main() color_output = PQEncode(linear_color_output); } - bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.width / 2, global_ubo.height / 2))); + bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.unscaled_width / 2, global_ubo.unscaled_height / 2))); if(is_readback_pixel) { readback.hdr_color = linear_color_output; diff --git a/src/refresh/vkpt/shader/bloom_blur.comp b/src/refresh/vkpt/shader/bloom_blur.comp index 8fe041a82..3e9d25a85 100644 --- a/src/refresh/vkpt/shader/bloom_blur.comp +++ b/src/refresh/vkpt/shader/bloom_blur.comp @@ -39,7 +39,7 @@ layout (push_constant) uniform push_constant_block { vec2 img_to_uv(vec2 ipos) { - vec2 uv = ipos / vec2(global_ubo.screen_image_width / 4, global_ubo.screen_image_height / 4); + vec2 uv = ipos / vec2(global_ubo.unscaled_width / 4, global_ubo.unscaled_height / 4); return uv; } @@ -72,7 +72,7 @@ void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - ivec2 bloom_extent = ivec2(global_ubo.width / 4, global_ubo.height / 4); + ivec2 bloom_extent = ivec2(global_ubo.unscaled_width / 4, global_ubo.unscaled_height / 4); if(any(greaterThanEqual(ipos, bloom_extent))) { diff --git a/src/refresh/vkpt/shader/bloom_composite.comp b/src/refresh/vkpt/shader/bloom_composite.comp index e88e2750e..1eaab4622 100644 --- a/src/refresh/vkpt/shader/bloom_composite.comp +++ b/src/refresh/vkpt/shader/bloom_composite.comp @@ -30,7 +30,7 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; vec2 img_to_uv(ivec2 ipos) { - vec2 uv = (vec2(ipos) + vec2(0.5, 0.5)) / vec2(global_ubo.screen_image_width, global_ubo.screen_image_height); + vec2 uv = (vec2(ipos) + vec2(0.5, 0.5)) / vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); uv = clamp(uv, 0, 1); return uv; } @@ -39,7 +39,7 @@ vec2 img_to_uv(ivec2 ipos) void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.width, global_ubo.height)))) + if(any(greaterThanEqual(ipos, ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height)))) return; vec2 uv = img_to_uv(ipos); diff --git a/src/refresh/vkpt/shader/global_textures.h b/src/refresh/vkpt/shader/global_textures.h index db02213a6..b0032fbea 100644 --- a/src/refresh/vkpt/shader/global_textures.h +++ b/src/refresh/vkpt/shader/global_textures.h @@ -25,6 +25,8 @@ with this program; if not, write to the Free Software Foundation, Inc., #define IMG_WIDTH (qvk.extent_screen_images.width) #define IMG_HEIGHT (qvk.extent_screen_images.height) #define IMG_WIDTH_MGPU (qvk.extent_screen_images.width / qvk.device_count) +#define IMG_WIDTH_UNSCALED (qvk.extent_unscaled.width) +#define IMG_HEIGHT_UNSCALED (qvk.extent_unscaled.height) #define IMG_WIDTH_GRAD ((qvk.extent_screen_images.width + GRAD_DWN - 1) / GRAD_DWN) #define IMG_HEIGHT_GRAD ((qvk.extent_screen_images.height + GRAD_DWN - 1) / GRAD_DWN) @@ -61,10 +63,10 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(FLAT_COLOR, 26, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(FLAT_MOTION, 27, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(PT_GODRAYS_THROUGHPUT_DIST,28, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(BLOOM_DOWNSCALE_MIP_1, 29, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH / 2, IMG_HEIGHT / 2 ) \ - IMG_DO(BLOOM_HBLUR, 30, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH / 4, IMG_HEIGHT / 4 ) \ - IMG_DO(BLOOM_VBLUR, 31, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH / 4, IMG_HEIGHT / 4 ) \ - IMG_DO(TAA_OUTPUT, 32, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(BLOOM_DOWNSCALE_MIP_1, 29, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED / 2, IMG_HEIGHT_UNSCALED / 2 ) \ + IMG_DO(BLOOM_HBLUR, 30, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED / 4, IMG_HEIGHT_UNSCALED / 4 ) \ + IMG_DO(BLOOM_VBLUR, 31, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED / 4, IMG_HEIGHT_UNSCALED / 4 ) \ + IMG_DO(TAA_OUTPUT, 32, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ IMG_DO(PT_VIEW_DIRECTION, 33, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_VIEW_DIRECTION2, 34, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_THROUGHPUT, 35, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -87,8 +89,8 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(ASVGF_FILTERED_SPEC_B, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ + IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ IMG_DO(ASVGF_RNG_SEED_A, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_B, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -107,8 +109,8 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(ASVGF_FILTERED_SPEC_A, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ + IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ IMG_DO(ASVGF_RNG_SEED_B, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_A, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 259b49818..099409f53 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -173,13 +173,17 @@ with this program; if not, write to the Free Software Foundation, Inc., \ GLOBAL_UBO_VAR_LIST_DO(int, prev_width) \ GLOBAL_UBO_VAR_LIST_DO(int, prev_height)\ - GLOBAL_UBO_VAR_LIST_DO(float, inv_width)\ - GLOBAL_UBO_VAR_LIST_DO(float, inv_height)\ + GLOBAL_UBO_VAR_LIST_DO(float, inv_width) \ + GLOBAL_UBO_VAR_LIST_DO(float, inv_height) \ \ + GLOBAL_UBO_VAR_LIST_DO(float, unscaled_width) \ + GLOBAL_UBO_VAR_LIST_DO(float, unscaled_height) \ + GLOBAL_UBO_VAR_LIST_DO(float, inv_unscaled_width) \ + GLOBAL_UBO_VAR_LIST_DO(float, inv_unscaled_height) \ + \ + GLOBAL_UBO_VAR_LIST_DO(vec2, sub_pixel_jitter) \ GLOBAL_UBO_VAR_LIST_DO(float, prev_adapted_luminance) \ GLOBAL_UBO_VAR_LIST_DO(float, padding1) \ - GLOBAL_UBO_VAR_LIST_DO(float, padding2) \ - GLOBAL_UBO_VAR_LIST_DO(float, padding3) \ \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_center) \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_size) \ diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index a7897556f..373ae2fbb 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -108,17 +108,18 @@ main() vec3 direction; vec2 pixel_offset; - - if(global_ubo.flt_taa != 0 || global_ubo.temporal_blend_factor > 0) - { + if(global_ubo.temporal_blend_factor > 0) + { + // Photo mode - use higher quality sampling pixel_offset = vec2(get_rng(RNG_PRIMARY_OFF_X), get_rng(RNG_PRIMARY_OFF_Y)); pixel_offset -= vec2(0.5); } - else + else { - pixel_offset = vec2(0); + // Real-time mode - use predictable sampling for TAAU + pixel_offset = global_ubo.sub_pixel_jitter; } - + const ivec2 image_position = get_image_position(); const vec2 pixel_center = vec2(image_position) + vec2(0.5); const vec2 inUV = (pixel_center + pixel_offset) / vec2(get_image_size()); diff --git a/src/refresh/vkpt/shader/tone_mapping_apply.comp b/src/refresh/vkpt/shader/tone_mapping_apply.comp index 00f26ce59..70e720354 100644 --- a/src/refresh/vkpt/shader/tone_mapping_apply.comp +++ b/src/refresh/vkpt/shader/tone_mapping_apply.comp @@ -121,7 +121,7 @@ vec3 srgb_dither(vec3 color, ivec2 ipos) void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.width, global_ubo.height)))) + if(any(greaterThanEqual(ipos, ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height)))) return; // Get input color and luminance. diff --git a/src/refresh/vkpt/shader/tone_mapping_histogram.comp b/src/refresh/vkpt/shader/tone_mapping_histogram.comp index 00a22266f..28a376936 100644 --- a/src/refresh/vkpt/shader/tone_mapping_histogram.comp +++ b/src/refresh/vkpt/shader/tone_mapping_histogram.comp @@ -172,7 +172,7 @@ shared uint s_Histogram[HISTOGRAM_BINS]; void main() { const ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.width, global_ubo.height)))) + if(any(greaterThanEqual(ipos, ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height)))) return; vec3 input_color = imageLoad(IMG_TAA_OUTPUT, ipos).rgb; @@ -207,7 +207,7 @@ void main() const uint right_bin = left_bin + 1; // Compute pixel importance based on distance to the center of the screen. - const vec2 screenSize = vec2(global_ubo.width, global_ubo.height); + const vec2 screenSize = vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); float weight = clamp(1.0 - length(vec2(ipos) / screenSize - vec2(0.5)) * 1.5, 0.01, 1.0); // Spatial weight const float right_weight_F = fract(histogram_bin) * weight; diff --git a/src/refresh/vkpt/tone_mapping.c b/src/refresh/vkpt/tone_mapping.c index f873ee005..64440432a 100644 --- a/src/refresh/vkpt/tone_mapping.c +++ b/src/refresh/vkpt/tone_mapping.c @@ -260,8 +260,8 @@ vkpt_tone_mapping_record_cmd_buffer(VkCommandBuffer cmd_buf, float frame_time) pipeline_layout_tone_mapping_histogram, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, - (qvk.extent_render.width + 15) / 16, - (qvk.extent_render.height + 15) / 16, + (qvk.extent_unscaled.width + 15) / 16, + (qvk.extent_unscaled.height + 15) / 16, 1); BUFFER_BARRIER(cmd_buf, @@ -385,8 +385,8 @@ vkpt_tone_mapping_record_cmd_buffer(VkCommandBuffer cmd_buf, float frame_time) VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_tm2_apply), push_constants_tm2_apply); vkCmdDispatch(cmd_buf, - (qvk.extent_render.width + 15) / 16, - (qvk.extent_render.height + 15) / 16, + (qvk.extent_unscaled.width + 15) / 16, + (qvk.extent_unscaled.height + 15) / 16, 1); // Because VKPT_IMG_TAA_OUTPUT changed, we make sure to wait for the image From c0ec9db5884ef03dcea6a579c4d98751aa4943c5 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 3 Mar 2020 11:29:12 -0800 Subject: [PATCH 02/48] Removed the Lanczos filter pass. --- src/CMakeLists.txt | 2 - src/refresh/vkpt/draw.c | 49 -------- src/refresh/vkpt/shader/final_blit.vert | 47 ------- .../vkpt/shader/final_blit_lanczos.frag | 116 ------------------ src/refresh/vkpt/vkpt.h | 2 - 5 files changed, 216 deletions(-) delete mode 100644 src/refresh/vkpt/shader/final_blit.vert delete mode 100644 src/refresh/vkpt/shader/final_blit_lanczos.frag diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eb2fac6ac..2f8edfb47 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -327,8 +327,6 @@ set(SRC_SHADERS refresh/vkpt/shader/sky_buffer_resolve.comp refresh/vkpt/shader/stretch_pic.frag refresh/vkpt/shader/stretch_pic.vert - refresh/vkpt/shader/final_blit_lanczos.frag - refresh/vkpt/shader/final_blit.vert ) include(../cmake/compileShaders.cmake) diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index a99f2c54b..775fec497 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -48,10 +48,8 @@ static qboolean clip_enable = qfalse; static StretchPic_t stretch_pic_queue[MAX_STRETCH_PICS]; static VkPipelineLayout pipeline_layout_stretch_pic; -static VkPipelineLayout pipeline_layout_final_blit; static VkRenderPass render_pass_stretch_pic; static VkPipeline pipeline_stretch_pic; -static VkPipeline pipeline_final_blit; static VkFramebuffer framebuffer_stretch_pic[MAX_FRAMES_IN_FLIGHT]; static BufferResource_t buf_stretch_pic_queue[MAX_FRAMES_IN_FLIGHT]; static VkDescriptorSetLayout desc_set_layout_sbo; @@ -310,9 +308,7 @@ vkpt_draw_destroy_pipelines() { LOG_FUNC(); vkDestroyPipeline(qvk.device, pipeline_stretch_pic, NULL); - vkDestroyPipeline(qvk.device, pipeline_final_blit, NULL); vkDestroyPipelineLayout(qvk.device, pipeline_layout_stretch_pic, NULL); - vkDestroyPipelineLayout(qvk.device, pipeline_layout_final_blit, NULL); for(int i = 0; i < qvk.num_swap_chain_images; i++) { vkDestroyFramebuffer(qvk.device, framebuffer_stretch_pic[i], NULL); } @@ -333,13 +329,6 @@ vkpt_draw_create_pipelines() .pSetLayouts = desc_set_layouts ); - desc_set_layouts[0] = qvk.desc_set_layout_ubo; - - CREATE_PIPELINE_LAYOUT(qvk.device, &pipeline_layout_final_blit, - .setLayoutCount = LENGTH(desc_set_layouts), - .pSetLayouts = desc_set_layouts - ); - VkPipelineShaderStageCreateInfo shader_info[] = { SHADER_STAGE(QVK_MOD_STRETCH_PIC_VERT, VK_SHADER_STAGE_VERTEX_BIT), SHADER_STAGE(QVK_MOD_STRETCH_PIC_FRAG, VK_SHADER_STAGE_FRAGMENT_BIT) @@ -454,18 +443,6 @@ vkpt_draw_create_pipelines() ATTACH_LABEL_VARIABLE(pipeline_stretch_pic, PIPELINE); - VkPipelineShaderStageCreateInfo shader_info_final_blit[] = { - SHADER_STAGE(QVK_MOD_FINAL_BLIT_VERT, VK_SHADER_STAGE_VERTEX_BIT), - SHADER_STAGE(QVK_MOD_FINAL_BLIT_LANCZOS_FRAG, VK_SHADER_STAGE_FRAGMENT_BIT) - }; - - pipeline_info.pStages = shader_info_final_blit; - pipeline_info.layout = pipeline_layout_final_blit; - - _VK(vkCreateGraphicsPipelines(qvk.device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, &pipeline_final_blit)); - ATTACH_LABEL_VARIABLE(pipeline_final_blit, PIPELINE); - - for(int i = 0; i < qvk.num_swap_chain_images; i++) { VkImageView attachments[] = { qvk.swap_chain_image_views[i] @@ -600,32 +577,6 @@ vkpt_final_blit_simple(VkCommandBuffer cmd_buf) return VK_SUCCESS; } -VkResult -vkpt_final_blit_filtered(VkCommandBuffer cmd_buf) -{ - VkRenderPassBeginInfo render_pass_info = { - .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, - .renderPass = render_pass_stretch_pic, - .framebuffer = framebuffer_stretch_pic[qvk.current_swap_chain_image_index], - .renderArea.offset = { 0, 0 }, - .renderArea.extent = vkpt_draw_get_extent() - }; - - VkDescriptorSet desc_sets[] = { - qvk.desc_set_ubo, - qvk_get_current_desc_set_textures() - }; - - vkCmdBeginRenderPass(cmd_buf, &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout_final_blit, 0, LENGTH(desc_sets), desc_sets, 0, 0); - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_final_blit); - vkCmdDraw(cmd_buf, 4, 1, 0, 0); - vkCmdEndRenderPass(cmd_buf); - - return VK_SUCCESS; -} - void R_SetClipRect_RTX(const clipRect_t *clip) { if (clip) diff --git a/src/refresh/vkpt/shader/final_blit.vert b/src/refresh/vkpt/shader/final_blit.vert deleted file mode 100644 index 50e7c5960..000000000 --- a/src/refresh/vkpt/shader/final_blit.vert +++ /dev/null @@ -1,47 +0,0 @@ -/* -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -// ========================================================================== // -// Vertex shader that produces a full screen quad for the final blit. -// ========================================================================== // - -#version 450 -#extension GL_ARB_separate_shader_objects : enable - -out gl_PerVertex { - vec4 gl_Position; -}; - -layout(location = 0) out vec2 tex_coord; - -vec2 positions[4] = vec2[]( - vec2(0.0, 1.0), - vec2(0.0, 0.0), - vec2(1.0, 1.0), - vec2(1.0, 0.0) -); - -void -main() -{ - vec2 pos = positions[gl_VertexIndex] * 2 + vec2(-1); - tex_coord = positions[gl_VertexIndex]; - - gl_Position = vec4(pos, 0.0, 1.0); -} - diff --git a/src/refresh/vkpt/shader/final_blit_lanczos.frag b/src/refresh/vkpt/shader/final_blit_lanczos.frag deleted file mode 100644 index 1254d8101..000000000 --- a/src/refresh/vkpt/shader/final_blit_lanczos.frag +++ /dev/null @@ -1,116 +0,0 @@ -/* -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -// ========================================================================== // -// Pixel shader that samples the input texture using a Lanczos filter. -// Applied for the final blit when the resolution scale is not 100%. -// ========================================================================== // - -#version 450 -#extension GL_GOOGLE_include_directive : enable -#extension GL_ARB_separate_shader_objects : enable -#extension GL_EXT_nonuniform_qualifier : enable - -#include "utils.glsl" - -#define GLOBAL_UBO_DESC_SET_IDX 0 -#include "global_ubo.h" - -#define GLOBAL_TEXTURES_DESC_SET_IDX 1 -#include "global_textures.h" - -layout(location = 0) in vec2 tex_coord; -layout(location = 0) out vec4 outColor; - -// semi-vector form of the ternary operator: (f == val) ? eq : neq -vec2 v_sel(vec2 f, float val, float eq, vec2 neq) -{ - vec2 result; - result.x = (f.x == val) ? eq : neq.x; - result.y = (f.y == val) ? eq : neq.y; - return result; -} - -vec3 filter_lanczos(sampler2D img, vec2 uv) -{ - ivec2 size = textureSize(img, 0); - - // Lanczos 3 - vec2 UV = uv.xy * size; - vec2 tc = floor(UV - 0.5) + 0.5; - vec2 f = UV - tc + 2; - - // compute at f, f-1, f-2, f-3, f-4, and f-5 using trig angle addition - vec2 fpi = f * M_PI, fpi3 = f * (M_PI / 3.0); - vec2 sinfpi = sin(fpi), sinfpi3 = sin(fpi3), cosfpi3 = cos(fpi3); - const float r3 = sqrt(3.0); - vec2 w0 = v_sel(f, 0, M_PI * M_PI * 1.0 / 3.0, (sinfpi * sinfpi3) / (f * f)); - vec2 w1 = v_sel(f, 1, M_PI * M_PI * 2.0 / 3.0, (-sinfpi * (sinfpi3 - r3 * cosfpi3)) / ((f - 1.0)*(f - 1.0))); - vec2 w2 = v_sel(f, 2, M_PI * M_PI * 2.0 / 3.0, (sinfpi * (-sinfpi3 - r3 * cosfpi3)) / ((f - 2.0)*(f - 2.0))); - vec2 w3 = v_sel(f, 3, M_PI * M_PI * 2.0 / 3.0, (-sinfpi * (-2.0*sinfpi3)) / ((f - 3.0)*(f - 3.0))); - vec2 w4 = v_sel(f, 4, M_PI * M_PI * 2.0 / 3.0, (sinfpi * (-sinfpi3 + r3 * cosfpi3)) / ((f - 4.0)*(f - 4.0))); - vec2 w5 = v_sel(f, 5, M_PI * M_PI * 2.0 / 3.0, (-sinfpi * (sinfpi3 + r3 * cosfpi3)) / ((f - 5.0)*(f - 5.0))); - - // use bilinear texture weights to merge center two samples in each dimension - vec2 Weight[5]; - Weight[0] = w0; - Weight[1] = w1; - Weight[2] = w2 + w3; - Weight[3] = w4; - Weight[4] = w5; - - vec2 invTextureSize = 1.0 / vec2(size); - - vec2 Sample[5]; - Sample[0] = invTextureSize * (tc - 2); - Sample[1] = invTextureSize * (tc - 1); - Sample[2] = invTextureSize * (tc + w3 / Weight[2]); - Sample[3] = invTextureSize * (tc + 2); - Sample[4] = invTextureSize * (tc + 3); - - vec4 o_rgba = vec4(0); - - // 5x5 footprint with corners dropped to give 13 texture taps - o_rgba += vec4(textureLod(img, vec2(Sample[0].x, Sample[2].y), 0).rgb, 1.0) * Weight[0].x * Weight[2].y; - o_rgba += vec4(textureLod(img, vec2(Sample[1].x, Sample[1].y), 0).rgb, 1.0) * Weight[1].x * Weight[1].y; - o_rgba += vec4(textureLod(img, vec2(Sample[1].x, Sample[2].y), 0).rgb, 1.0) * Weight[1].x * Weight[2].y; - o_rgba += vec4(textureLod(img, vec2(Sample[1].x, Sample[3].y), 0).rgb, 1.0) * Weight[1].x * Weight[3].y; - o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[0].y), 0).rgb, 1.0) * Weight[2].x * Weight[0].y; - o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[1].y), 0).rgb, 1.0) * Weight[2].x * Weight[1].y; - o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[2].y), 0).rgb, 1.0) * Weight[2].x * Weight[2].y; - o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[3].y), 0).rgb, 1.0) * Weight[2].x * Weight[3].y; - o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[4].y), 0).rgb, 1.0) * Weight[2].x * Weight[4].y; - o_rgba += vec4(textureLod(img, vec2(Sample[3].x, Sample[1].y), 0).rgb, 1.0) * Weight[3].x * Weight[1].y; - o_rgba += vec4(textureLod(img, vec2(Sample[3].x, Sample[2].y), 0).rgb, 1.0) * Weight[3].x * Weight[2].y; - o_rgba += vec4(textureLod(img, vec2(Sample[3].x, Sample[3].y), 0).rgb, 1.0) * Weight[3].x * Weight[3].y; - o_rgba += vec4(textureLod(img, vec2(Sample[4].x, Sample[2].y), 0).rgb, 1.0) * Weight[4].x * Weight[2].y; - - return o_rgba.rgb / o_rgba.w; -} - -void -main() -{ - vec3 color; - - vec2 uv = tex_coord * vec2(global_ubo.width, global_ubo.height) / vec2(global_ubo.screen_image_width, global_ubo.screen_image_height); - - color = filter_lanczos(TEX_TAA_OUTPUT, uv); - - outColor = vec4(color, 1); -} diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index ad8355059..1d66f9ca9 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -70,8 +70,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #define LIST_SHADER_MODULES \ SHADER_MODULE_DO(QVK_MOD_STRETCH_PIC_VERT) \ SHADER_MODULE_DO(QVK_MOD_STRETCH_PIC_FRAG) \ - SHADER_MODULE_DO(QVK_MOD_FINAL_BLIT_VERT) \ - SHADER_MODULE_DO(QVK_MOD_FINAL_BLIT_LANCZOS_FRAG) \ SHADER_MODULE_DO(QVK_MOD_PRIMARY_RAYS_RGEN) \ SHADER_MODULE_DO(QVK_MOD_REFLECT_REFRACT_RGEN) \ SHADER_MODULE_DO(QVK_MOD_DIRECT_LIGHTING_RGEN) \ From e6f386fa1cac9813649ff1b493ed232a86978c26 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 4 Mar 2020 15:33:30 -0800 Subject: [PATCH 03/48] Added automatic adjustment of texture LOD bias to make the post-TAAU image sharper, and set the default TAA variance scale to 1.0, which is more TAAU friendly. --- src/refresh/vkpt/main.c | 8 ++++++++ src/refresh/vkpt/shader/global_ubo.h | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index a8731a190..6de9e1cd4 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -2159,6 +2159,14 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->pt_ndf_trim = 1.f; } } + else + { + // adjust texture LOD bias to the resolution scale, i.e. use negative bias if scale is < 100 + float resolution_scale = (drs_effective_scale != 0) ? (float)drs_effective_scale : (float)scr_viewsize->integer; + resolution_scale *= 0.01f; + resolution_scale = clamp(resolution_scale, 0.1f, 1.f); + ubo->pt_texture_lod_bias = cvar_pt_texture_lod_bias->value + log2f(resolution_scale); + } { // figure out if DoF should be enabled in the current rendering mode diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 099409f53..0ec327eaf 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -60,7 +60,7 @@ with this program; if not, write to the Free Software Foundation, Inc., UBO_CVAR_DO(flt_show_gradients, 0) /* switch for showing the gradient values as overlay image, 0 or 1 */ \ UBO_CVAR_DO(flt_taa, 1) /* switch for temporal AA, 0 or 1 */ \ UBO_CVAR_DO(flt_taa_anti_sparkle, 0.25) /* strength of the anti-sparkle filter of TAA, [0..1] */ \ - UBO_CVAR_DO(flt_taa_variance, 0.7) /* temporal AA variance window scale, 0 means disable NCC, [0..inf) */ \ + UBO_CVAR_DO(flt_taa_variance, 1.0) /* temporal AA variance window scale, 0 means disable NCC, [0..inf) */ \ UBO_CVAR_DO(flt_taa_history_weight, 0.95) /* temporal AA weight of the history sample, [0..1) */ \ UBO_CVAR_DO(flt_temporal_hf, 1) /* temporal filter strength, [0..1] */ \ UBO_CVAR_DO(flt_temporal_lf, 1) \ From d6f761ca7b618eac1972a8304999826fbab40c1c Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 04:21:03 -0800 Subject: [PATCH 04/48] Revert "Removed the Lanczos filter pass." This reverts commit c0ec9db5884ef03dcea6a579c4d98751aa4943c5. --- src/CMakeLists.txt | 2 + src/refresh/vkpt/draw.c | 49 ++++++++ src/refresh/vkpt/shader/final_blit.vert | 47 +++++++ .../vkpt/shader/final_blit_lanczos.frag | 116 ++++++++++++++++++ src/refresh/vkpt/vkpt.h | 2 + 5 files changed, 216 insertions(+) create mode 100644 src/refresh/vkpt/shader/final_blit.vert create mode 100644 src/refresh/vkpt/shader/final_blit_lanczos.frag diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2f8edfb47..eb2fac6ac 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -327,6 +327,8 @@ set(SRC_SHADERS refresh/vkpt/shader/sky_buffer_resolve.comp refresh/vkpt/shader/stretch_pic.frag refresh/vkpt/shader/stretch_pic.vert + refresh/vkpt/shader/final_blit_lanczos.frag + refresh/vkpt/shader/final_blit.vert ) include(../cmake/compileShaders.cmake) diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index 775fec497..a99f2c54b 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -48,8 +48,10 @@ static qboolean clip_enable = qfalse; static StretchPic_t stretch_pic_queue[MAX_STRETCH_PICS]; static VkPipelineLayout pipeline_layout_stretch_pic; +static VkPipelineLayout pipeline_layout_final_blit; static VkRenderPass render_pass_stretch_pic; static VkPipeline pipeline_stretch_pic; +static VkPipeline pipeline_final_blit; static VkFramebuffer framebuffer_stretch_pic[MAX_FRAMES_IN_FLIGHT]; static BufferResource_t buf_stretch_pic_queue[MAX_FRAMES_IN_FLIGHT]; static VkDescriptorSetLayout desc_set_layout_sbo; @@ -308,7 +310,9 @@ vkpt_draw_destroy_pipelines() { LOG_FUNC(); vkDestroyPipeline(qvk.device, pipeline_stretch_pic, NULL); + vkDestroyPipeline(qvk.device, pipeline_final_blit, NULL); vkDestroyPipelineLayout(qvk.device, pipeline_layout_stretch_pic, NULL); + vkDestroyPipelineLayout(qvk.device, pipeline_layout_final_blit, NULL); for(int i = 0; i < qvk.num_swap_chain_images; i++) { vkDestroyFramebuffer(qvk.device, framebuffer_stretch_pic[i], NULL); } @@ -329,6 +333,13 @@ vkpt_draw_create_pipelines() .pSetLayouts = desc_set_layouts ); + desc_set_layouts[0] = qvk.desc_set_layout_ubo; + + CREATE_PIPELINE_LAYOUT(qvk.device, &pipeline_layout_final_blit, + .setLayoutCount = LENGTH(desc_set_layouts), + .pSetLayouts = desc_set_layouts + ); + VkPipelineShaderStageCreateInfo shader_info[] = { SHADER_STAGE(QVK_MOD_STRETCH_PIC_VERT, VK_SHADER_STAGE_VERTEX_BIT), SHADER_STAGE(QVK_MOD_STRETCH_PIC_FRAG, VK_SHADER_STAGE_FRAGMENT_BIT) @@ -443,6 +454,18 @@ vkpt_draw_create_pipelines() ATTACH_LABEL_VARIABLE(pipeline_stretch_pic, PIPELINE); + VkPipelineShaderStageCreateInfo shader_info_final_blit[] = { + SHADER_STAGE(QVK_MOD_FINAL_BLIT_VERT, VK_SHADER_STAGE_VERTEX_BIT), + SHADER_STAGE(QVK_MOD_FINAL_BLIT_LANCZOS_FRAG, VK_SHADER_STAGE_FRAGMENT_BIT) + }; + + pipeline_info.pStages = shader_info_final_blit; + pipeline_info.layout = pipeline_layout_final_blit; + + _VK(vkCreateGraphicsPipelines(qvk.device, VK_NULL_HANDLE, 1, &pipeline_info, NULL, &pipeline_final_blit)); + ATTACH_LABEL_VARIABLE(pipeline_final_blit, PIPELINE); + + for(int i = 0; i < qvk.num_swap_chain_images; i++) { VkImageView attachments[] = { qvk.swap_chain_image_views[i] @@ -577,6 +600,32 @@ vkpt_final_blit_simple(VkCommandBuffer cmd_buf) return VK_SUCCESS; } +VkResult +vkpt_final_blit_filtered(VkCommandBuffer cmd_buf) +{ + VkRenderPassBeginInfo render_pass_info = { + .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, + .renderPass = render_pass_stretch_pic, + .framebuffer = framebuffer_stretch_pic[qvk.current_swap_chain_image_index], + .renderArea.offset = { 0, 0 }, + .renderArea.extent = vkpt_draw_get_extent() + }; + + VkDescriptorSet desc_sets[] = { + qvk.desc_set_ubo, + qvk_get_current_desc_set_textures() + }; + + vkCmdBeginRenderPass(cmd_buf, &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_GRAPHICS, + pipeline_layout_final_blit, 0, LENGTH(desc_sets), desc_sets, 0, 0); + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_final_blit); + vkCmdDraw(cmd_buf, 4, 1, 0, 0); + vkCmdEndRenderPass(cmd_buf); + + return VK_SUCCESS; +} + void R_SetClipRect_RTX(const clipRect_t *clip) { if (clip) diff --git a/src/refresh/vkpt/shader/final_blit.vert b/src/refresh/vkpt/shader/final_blit.vert new file mode 100644 index 000000000..50e7c5960 --- /dev/null +++ b/src/refresh/vkpt/shader/final_blit.vert @@ -0,0 +1,47 @@ +/* +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +// ========================================================================== // +// Vertex shader that produces a full screen quad for the final blit. +// ========================================================================== // + +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +out gl_PerVertex { + vec4 gl_Position; +}; + +layout(location = 0) out vec2 tex_coord; + +vec2 positions[4] = vec2[]( + vec2(0.0, 1.0), + vec2(0.0, 0.0), + vec2(1.0, 1.0), + vec2(1.0, 0.0) +); + +void +main() +{ + vec2 pos = positions[gl_VertexIndex] * 2 + vec2(-1); + tex_coord = positions[gl_VertexIndex]; + + gl_Position = vec4(pos, 0.0, 1.0); +} + diff --git a/src/refresh/vkpt/shader/final_blit_lanczos.frag b/src/refresh/vkpt/shader/final_blit_lanczos.frag new file mode 100644 index 000000000..1254d8101 --- /dev/null +++ b/src/refresh/vkpt/shader/final_blit_lanczos.frag @@ -0,0 +1,116 @@ +/* +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +// ========================================================================== // +// Pixel shader that samples the input texture using a Lanczos filter. +// Applied for the final blit when the resolution scale is not 100%. +// ========================================================================== // + +#version 450 +#extension GL_GOOGLE_include_directive : enable +#extension GL_ARB_separate_shader_objects : enable +#extension GL_EXT_nonuniform_qualifier : enable + +#include "utils.glsl" + +#define GLOBAL_UBO_DESC_SET_IDX 0 +#include "global_ubo.h" + +#define GLOBAL_TEXTURES_DESC_SET_IDX 1 +#include "global_textures.h" + +layout(location = 0) in vec2 tex_coord; +layout(location = 0) out vec4 outColor; + +// semi-vector form of the ternary operator: (f == val) ? eq : neq +vec2 v_sel(vec2 f, float val, float eq, vec2 neq) +{ + vec2 result; + result.x = (f.x == val) ? eq : neq.x; + result.y = (f.y == val) ? eq : neq.y; + return result; +} + +vec3 filter_lanczos(sampler2D img, vec2 uv) +{ + ivec2 size = textureSize(img, 0); + + // Lanczos 3 + vec2 UV = uv.xy * size; + vec2 tc = floor(UV - 0.5) + 0.5; + vec2 f = UV - tc + 2; + + // compute at f, f-1, f-2, f-3, f-4, and f-5 using trig angle addition + vec2 fpi = f * M_PI, fpi3 = f * (M_PI / 3.0); + vec2 sinfpi = sin(fpi), sinfpi3 = sin(fpi3), cosfpi3 = cos(fpi3); + const float r3 = sqrt(3.0); + vec2 w0 = v_sel(f, 0, M_PI * M_PI * 1.0 / 3.0, (sinfpi * sinfpi3) / (f * f)); + vec2 w1 = v_sel(f, 1, M_PI * M_PI * 2.0 / 3.0, (-sinfpi * (sinfpi3 - r3 * cosfpi3)) / ((f - 1.0)*(f - 1.0))); + vec2 w2 = v_sel(f, 2, M_PI * M_PI * 2.0 / 3.0, (sinfpi * (-sinfpi3 - r3 * cosfpi3)) / ((f - 2.0)*(f - 2.0))); + vec2 w3 = v_sel(f, 3, M_PI * M_PI * 2.0 / 3.0, (-sinfpi * (-2.0*sinfpi3)) / ((f - 3.0)*(f - 3.0))); + vec2 w4 = v_sel(f, 4, M_PI * M_PI * 2.0 / 3.0, (sinfpi * (-sinfpi3 + r3 * cosfpi3)) / ((f - 4.0)*(f - 4.0))); + vec2 w5 = v_sel(f, 5, M_PI * M_PI * 2.0 / 3.0, (-sinfpi * (sinfpi3 + r3 * cosfpi3)) / ((f - 5.0)*(f - 5.0))); + + // use bilinear texture weights to merge center two samples in each dimension + vec2 Weight[5]; + Weight[0] = w0; + Weight[1] = w1; + Weight[2] = w2 + w3; + Weight[3] = w4; + Weight[4] = w5; + + vec2 invTextureSize = 1.0 / vec2(size); + + vec2 Sample[5]; + Sample[0] = invTextureSize * (tc - 2); + Sample[1] = invTextureSize * (tc - 1); + Sample[2] = invTextureSize * (tc + w3 / Weight[2]); + Sample[3] = invTextureSize * (tc + 2); + Sample[4] = invTextureSize * (tc + 3); + + vec4 o_rgba = vec4(0); + + // 5x5 footprint with corners dropped to give 13 texture taps + o_rgba += vec4(textureLod(img, vec2(Sample[0].x, Sample[2].y), 0).rgb, 1.0) * Weight[0].x * Weight[2].y; + o_rgba += vec4(textureLod(img, vec2(Sample[1].x, Sample[1].y), 0).rgb, 1.0) * Weight[1].x * Weight[1].y; + o_rgba += vec4(textureLod(img, vec2(Sample[1].x, Sample[2].y), 0).rgb, 1.0) * Weight[1].x * Weight[2].y; + o_rgba += vec4(textureLod(img, vec2(Sample[1].x, Sample[3].y), 0).rgb, 1.0) * Weight[1].x * Weight[3].y; + o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[0].y), 0).rgb, 1.0) * Weight[2].x * Weight[0].y; + o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[1].y), 0).rgb, 1.0) * Weight[2].x * Weight[1].y; + o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[2].y), 0).rgb, 1.0) * Weight[2].x * Weight[2].y; + o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[3].y), 0).rgb, 1.0) * Weight[2].x * Weight[3].y; + o_rgba += vec4(textureLod(img, vec2(Sample[2].x, Sample[4].y), 0).rgb, 1.0) * Weight[2].x * Weight[4].y; + o_rgba += vec4(textureLod(img, vec2(Sample[3].x, Sample[1].y), 0).rgb, 1.0) * Weight[3].x * Weight[1].y; + o_rgba += vec4(textureLod(img, vec2(Sample[3].x, Sample[2].y), 0).rgb, 1.0) * Weight[3].x * Weight[2].y; + o_rgba += vec4(textureLod(img, vec2(Sample[3].x, Sample[3].y), 0).rgb, 1.0) * Weight[3].x * Weight[3].y; + o_rgba += vec4(textureLod(img, vec2(Sample[4].x, Sample[2].y), 0).rgb, 1.0) * Weight[4].x * Weight[2].y; + + return o_rgba.rgb / o_rgba.w; +} + +void +main() +{ + vec3 color; + + vec2 uv = tex_coord * vec2(global_ubo.width, global_ubo.height) / vec2(global_ubo.screen_image_width, global_ubo.screen_image_height); + + color = filter_lanczos(TEX_TAA_OUTPUT, uv); + + outColor = vec4(color, 1); +} diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 1d66f9ca9..ad8355059 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -70,6 +70,8 @@ with this program; if not, write to the Free Software Foundation, Inc., #define LIST_SHADER_MODULES \ SHADER_MODULE_DO(QVK_MOD_STRETCH_PIC_VERT) \ SHADER_MODULE_DO(QVK_MOD_STRETCH_PIC_FRAG) \ + SHADER_MODULE_DO(QVK_MOD_FINAL_BLIT_VERT) \ + SHADER_MODULE_DO(QVK_MOD_FINAL_BLIT_LANCZOS_FRAG) \ SHADER_MODULE_DO(QVK_MOD_PRIMARY_RAYS_RGEN) \ SHADER_MODULE_DO(QVK_MOD_REFLECT_REFRACT_RGEN) \ SHADER_MODULE_DO(QVK_MOD_DIRECT_LIGHTING_RGEN) \ From 750bbd6a5251bf1cebc2f3cd1ad1bdca197f9e5e Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 05:35:38 -0800 Subject: [PATCH 05/48] Added runtime toggle between TAA and TAAU (flt_taa = 2). --- src/CMakeLists.txt | 1 + src/refresh/vkpt/asvgf.c | 28 ++- src/refresh/vkpt/bloom.c | 3 +- src/refresh/vkpt/draw.c | 8 +- src/refresh/vkpt/main.c | 23 +- src/refresh/vkpt/shader/asvgf_taa.comp | 34 +-- src/refresh/vkpt/shader/asvgf_taau.comp | 230 ++++++++++++++++++ src/refresh/vkpt/shader/primary_rays.rgen | 4 +- .../vkpt/shader/tone_mapping_apply.comp | 6 +- .../vkpt/shader/tone_mapping_histogram.comp | 9 +- src/refresh/vkpt/vkpt.h | 1 + 11 files changed, 309 insertions(+), 38 deletions(-) create mode 100644 src/refresh/vkpt/shader/asvgf_taau.comp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index eb2fac6ac..118a37d4f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -315,6 +315,7 @@ set(SRC_SHADERS refresh/vkpt/shader/asvgf_lf.comp refresh/vkpt/shader/asvgf_seed_rng.comp refresh/vkpt/shader/asvgf_taa.comp + refresh/vkpt/shader/asvgf_taau.comp refresh/vkpt/shader/asvgf_temporal.comp refresh/vkpt/shader/instance_geometry.comp refresh/vkpt/shader/tone_mapping_histogram.comp diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index 1754d619b..1b6a162d4 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -31,6 +31,7 @@ enum { ATROUS_ITER_2, ATROUS_ITER_3, TAA, + TAAU, CHECKERBOARD_INTERLEAVE, COMPOSITING, ASVGF_NUM_PIPELINES @@ -41,6 +42,8 @@ static VkPipelineLayout pipeline_layout_atrous; static VkPipelineLayout pipeline_layout_general; static VkPipelineLayout pipeline_layout_taa; +extern cvar_t* cvar_flt_taa; + VkResult vkpt_asvgf_initialize() { @@ -169,6 +172,11 @@ vkpt_asvgf_create_pipelines() .stage = SHADER_STAGE(QVK_MOD_ASVGF_TAA_COMP, VK_SHADER_STAGE_COMPUTE_BIT), .layout = pipeline_layout_general, }, + [TAAU] = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = SHADER_STAGE(QVK_MOD_ASVGF_TAAU_COMP, VK_SHADER_STAGE_COMPUTE_BIT), + .layout = pipeline_layout_general, + }, [COMPOSITING] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE(QVK_MOD_COMPOSITING_COMP, VK_SHADER_STAGE_COMPUTE_BIT), @@ -562,11 +570,27 @@ VkResult vkpt_taa(VkCommandBuffer cmd_buf) BEGIN_PERF_MARKER(cmd_buf, PROFILER_ASVGF_TAA); - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TAA]); + if (cvar_flt_taa->integer == 2) + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TAAU]); + else + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TAA]); + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_taa, 0, LENGTH(desc_sets), desc_sets, 0, 0); - VkExtent2D dispatch_size = qvk.extent_unscaled; + VkExtent2D dispatch_size; + if (cvar_flt_taa->integer == 2) + dispatch_size = qvk.extent_unscaled; + else + { + dispatch_size = qvk.extent_render; + + if (dispatch_size.width < qvk.extent_screen_images.width) + dispatch_size.width += 8; + + if (dispatch_size.height < qvk.extent_screen_images.height) + dispatch_size.height += 8; + } vkCmdDispatch(cmd_buf, (dispatch_size.width + 15) / 16, diff --git a/src/refresh/vkpt/bloom.c b/src/refresh/vkpt/bloom.c index b93a98451..726d96c05 100644 --- a/src/refresh/vkpt/bloom.c +++ b/src/refresh/vkpt/bloom.c @@ -48,6 +48,7 @@ cvar_t *cvar_bloom_sigma = NULL; cvar_t *cvar_bloom_intensity = NULL; cvar_t *cvar_bloom_sigma_water = NULL; cvar_t *cvar_bloom_intensity_water = NULL; +extern cvar_t* cvar_flt_taa; static float bloom_intensity; static float bloom_sigma; @@ -55,7 +56,7 @@ static float under_water_animation; static void compute_push_constants() { - float sigma_pixels = bloom_sigma * qvk.extent_unscaled.height; + float sigma_pixels = bloom_sigma * (float)((cvar_flt_taa->integer == 2) ? qvk.extent_unscaled.height : qvk.extent_render.height); float effective_sigma = sigma_pixels * 0.25f; effective_sigma = min(effective_sigma, 100.f); diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index a99f2c54b..aabb5342f 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -58,6 +58,8 @@ static VkDescriptorSetLayout desc_set_layout_sbo; static VkDescriptorPool desc_pool_sbo; static VkDescriptorSet desc_set_sbo[MAX_FRAMES_IN_FLIGHT]; +extern cvar_t* cvar_flt_taa; + VkExtent2D vkpt_draw_get_extent() { @@ -562,8 +564,12 @@ vkpt_final_blit_simple(VkCommandBuffer cmd_buf) .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL ); + qboolean taau = cvar_flt_taa->integer == 2; + VkOffset3D blit_size = { - .x = qvk.extent_unscaled.width,.y = qvk.extent_unscaled.height,.z = 1 + .x = taau ? qvk.extent_unscaled.width : qvk.extent_render.width, + .y = taau ? qvk.extent_unscaled.height : qvk.extent_render.height, + .z = 1 }; VkOffset3D blit_size_unscaled = { .x = qvk.extent_unscaled.width,.y = qvk.extent_unscaled.height,.z = 1 diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 6de9e1cd4..6040ff76c 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -65,6 +65,7 @@ cvar_t *cvar_drs_adjust_down = NULL; cvar_t *cvar_drs_gain = NULL; extern cvar_t *scr_viewsize; extern cvar_t *cvar_bloom_enable; +extern cvar_t* cvar_flt_taa; static int drs_current_scale = 0; static int drs_effective_scale = 0; @@ -2159,7 +2160,7 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->pt_ndf_trim = 1.f; } } - else + else if(cvar_flt_taa->integer == 2) { // adjust texture LOD bias to the resolution scale, i.e. use negative bias if scale is < 100 float resolution_scale = (drs_effective_scale != 0) ? (float)drs_effective_scale : (float)scr_viewsize->integer; @@ -2199,7 +2200,8 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->temporal_blend_factor = ref_mode->temporal_blend_factor; ubo->flt_enable = ref_mode->enable_denoiser; - ubo->flt_taa = ubo->flt_taa && ref_mode->enable_denoiser; + if (!ref_mode->enable_denoiser) + ubo->flt_taa = 0; ubo->pt_num_bounce_rays = ref_mode->num_bounce_rays; ubo->pt_reflect_refract = ref_mode->reflect_refract; @@ -2783,7 +2785,22 @@ R_EndFrame_RTX(void) if (frame_ready) { - vkpt_final_blit_simple(cmd_buf); + if (cvar_flt_taa->integer == 2) + { + vkpt_final_blit_simple(cmd_buf); + } + else + { + VkExtent2D extent_render_double; + extent_render_double.width = qvk.extent_render.width * 2; + extent_render_double.height = qvk.extent_render.height * 2; + + if (extents_equal(qvk.extent_render, qvk.extent_unscaled) || + extents_equal(extent_render_double, qvk.extent_unscaled) && drs_current_scale == 0) // don't do nearest filter 2x upscale with DRS enabled + vkpt_final_blit_simple(cmd_buf); + else + vkpt_final_blit_filtered(cmd_buf); + } frame_ready = qfalse; } diff --git a/src/refresh/vkpt/shader/asvgf_taa.comp b/src/refresh/vkpt/shader/asvgf_taa.comp index 9cb58d31a..f1296fba5 100644 --- a/src/refresh/vkpt/shader/asvgf_taa.comp +++ b/src/refresh/vkpt/shader/asvgf_taa.comp @@ -89,28 +89,18 @@ void get_moments(ivec2 ipos, int r, out vec3 mom1, out vec3 mom2) } } -float get_sample_weight(vec2 delta, float scale) -{ - return clamp(1 - scale * dot(delta, delta), 0, 1); -} - void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if (ipos.x >= global_ubo.unscaled_width || ipos.y >= global_ubo.unscaled_height) + if (ipos.x >= global_ubo.width || ipos.y >= global_ubo.height) { imageStore(IMG_TAA_OUTPUT, ipos, vec4(0)); return; } - // Calculate position in the render buffer (at the lower render resolution) - vec2 nearest_render_pos = vec2(ipos.x + 0.5f, ipos.y + 0.5f) * vec2(global_ubo.width * global_ubo.inv_unscaled_width, global_ubo.height * global_ubo.inv_unscaled_height) - global_ubo.sub_pixel_jitter - vec2(0.5f); - ivec2 int_render_pos = ivec2(round(nearest_render_pos.x), round(nearest_render_pos.y)); - int_render_pos = clamp(int_render_pos, ivec2(0), ivec2(global_ubo.width - 1, global_ubo.height - 1)); - - vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, int_render_pos, 0); + vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, ipos, 0); vec3 color_center = PQEncode(linear_color_center.rgb); int checkerboard_flags = int(linear_color_center.a); @@ -127,7 +117,7 @@ main() int num_pix; // Obtain the color moments for the surrounding pixels. - get_moments(int_render_pos, 1, mom1, mom2); + get_moments(ipos, 1, mom1, mom2); num_pix = 9; // Remove or reduce sparkles by clamping the color of the center pixel to its surroundings @@ -152,7 +142,7 @@ main() const int r = 1; for(int yy = -r; yy <= r; yy++) { for(int xx = -r; xx <= r; xx++) { - ivec2 p = int_render_pos + ivec2(xx, yy); + ivec2 p = ipos + ivec2(xx, yy); vec2 m = texelFetch(TEX_FLAT_MOTION, p, 0).xy; float l = dot(m, m); if(l > len) { @@ -163,13 +153,11 @@ main() } } } - - motion *= vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); - vec2 pos_prev = vec2(ipos) + vec2(0.5) + motion.xy; + vec2 pos_prev = ((vec2(ipos) + vec2(0.5)) * vec2(global_ubo.inv_width, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width, global_ubo.prev_height); if(all(greaterThanEqual(ivec2(pos_prev), ivec2(1))) - && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - 1))) + && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.width, global_ubo.height) - 1))) { // Motion vector was valid - sample the previous frame vec3 color_prev = sample_texture_catmull_rom(TEX_ASVGF_TAA_B, pos_prev).rgb; @@ -179,7 +167,7 @@ main() // If enabled, apply neighbourhood color clamping (NCC) if(global_ubo.flt_taa_variance > 0) { - float variance_scale = global_ubo.flt_taa_variance; + float variance_scale = 0.7;// global_ubo.flt_taa_variance; // TAAU switch hack if(checkerboard_flags == (CHECKERBOARD_FLAG_REFLECTION | CHECKERBOARD_FLAG_REFRACTION)) variance_scale *= 2; @@ -192,11 +180,7 @@ main() } // Mix the new color with the clamped previous color - float motion_weight = smoothstep(0, 1.0f, sqrt(dot(motion, motion))); - float sample_weight = get_sample_weight(nearest_render_pos - int_render_pos, global_ubo.unscaled_width * global_ubo.inv_width); - float pixel_weight = max(motion_weight, sample_weight) * 0.1f; - pixel_weight = clamp(pixel_weight, 0, 1); - color_output = mix(color_prev, color_center, pixel_weight); + color_output = mix(color_center, color_prev, clamp(global_ubo.flt_taa_history_weight, 0, 0.999)); } } @@ -219,7 +203,7 @@ main() color_output = PQEncode(linear_color_output); } - bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.unscaled_width / 2, global_ubo.unscaled_height / 2))); + bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.width / 2, global_ubo.height / 2))); if(is_readback_pixel) { readback.hdr_color = linear_color_output; diff --git a/src/refresh/vkpt/shader/asvgf_taau.comp b/src/refresh/vkpt/shader/asvgf_taau.comp new file mode 100644 index 000000000..9cb58d31a --- /dev/null +++ b/src/refresh/vkpt/shader/asvgf_taau.comp @@ -0,0 +1,230 @@ +/* +Copyright (C) 2018 Christoph Schied +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +// ========================================================================== // +// A simple temporal anti-aliasing filter that operates in the PQ (Perceptual +// Quantizer) color space, which improves high-contrast edges - for example, +// between some geometry and the sun. +// ========================================================================== // + +#version 460 +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_nonuniform_qualifier : enable + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +#define GLOBAL_UBO_DESC_SET_IDX 0 +#include "global_ubo.h" + +#define GLOBAL_TEXTURES_DESC_SET_IDX 1 +#include "global_textures.h" + +#define VERTEX_BUFFER_DESC_SET_IDX 2 +#define VERTEX_READONLY 1 +#include "vertex_buffer.h" + +#include "utils.glsl" +#include "asvgf.glsl" + +const float pq_m1 = 0.1593017578125; +const float pq_m2 = 78.84375; +const float pq_c1 = 0.8359375; +const float pq_c2 = 18.8515625; +const float pq_c3 = 18.6875; +const float pq_C = 10000.0; + +vec3 PQDecode(vec3 image) +{ + vec3 Np = pow(max(image, 0.0), vec3(1.0 / pq_m2)); + vec3 L = Np - pq_c1; + L = L / (pq_c2 - pq_c3 * Np); + L = pow(max(L, 0.0), vec3(1.0 / pq_m1)); + + return L * pq_C; // returns cd/m^2 +} + +vec3 PQEncode(vec3 image) +{ + vec3 L = image / pq_C; + vec3 Lm = pow(max(L, 0.0), vec3(pq_m1)); + vec3 N = (pq_c1 + pq_c2 * Lm) / (1.0 + pq_c3 * Lm); + image = pow(N, vec3(pq_m2)); + + return clamp(image, vec3(0), vec3(1)); +} + +void get_moments(ivec2 ipos, int r, out vec3 mom1, out vec3 mom2) +{ + mom1 = vec3(0.0); + mom2 = vec3(0.0); + + for(int yy = -r; yy <= r; yy++) { + for(int xx = -r; xx <= r; xx++) { + if(xx == 0 && yy == 0) + continue; + + ivec2 p = ipos + ivec2(xx, yy); + vec3 c = texelFetch(TEX_FLAT_COLOR, p, 0).rgb; + c = PQEncode(c); + + mom1 += c.rgb; + mom2 += c.rgb * c.rgb; + } + } +} + +float get_sample_weight(vec2 delta, float scale) +{ + return clamp(1 - scale * dot(delta, delta), 0, 1); +} + +void +main() +{ + ivec2 ipos = ivec2(gl_GlobalInvocationID); + + if (ipos.x >= global_ubo.unscaled_width || ipos.y >= global_ubo.unscaled_height) + { + imageStore(IMG_TAA_OUTPUT, ipos, vec4(0)); + return; + } + + // Calculate position in the render buffer (at the lower render resolution) + vec2 nearest_render_pos = vec2(ipos.x + 0.5f, ipos.y + 0.5f) * vec2(global_ubo.width * global_ubo.inv_unscaled_width, global_ubo.height * global_ubo.inv_unscaled_height) - global_ubo.sub_pixel_jitter - vec2(0.5f); + ivec2 int_render_pos = ivec2(round(nearest_render_pos.x), round(nearest_render_pos.y)); + int_render_pos = clamp(int_render_pos, ivec2(0), ivec2(global_ubo.width - 1, global_ubo.height - 1)); + + vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, int_render_pos, 0); + vec3 color_center = PQEncode(linear_color_center.rgb); + int checkerboard_flags = int(linear_color_center.a); + + vec3 color_output = color_center; + vec3 linear_color_output = linear_color_center.rgb; + + if(global_ubo.flt_taa != 0) + { + // Regular TAA mode + + vec3 mom1; + vec3 mom2; + + int num_pix; + + // Obtain the color moments for the surrounding pixels. + get_moments(int_render_pos, 1, mom1, mom2); + num_pix = 9; + + // Remove or reduce sparkles by clamping the color of the center pixel to its surroundings + if(global_ubo.flt_taa_anti_sparkle > 0) + { + // Custom curve to make perceived blurriness depend on the cvar in a roughly linear way + float scale = pow(min(1.0, global_ubo.flt_taa_anti_sparkle), -0.25); + + color_center = min(color_center, scale * mom1 / (num_pix - 1)); + } + + mom1 += color_center; + mom2 += color_center * color_center; + + mom1 /= float(num_pix); + mom2 /= float(num_pix); + + // Find the longest motion vector in a 3x3 window + vec2 motion; + { + float len = -1; + const int r = 1; + for(int yy = -r; yy <= r; yy++) { + for(int xx = -r; xx <= r; xx++) { + ivec2 p = int_render_pos + ivec2(xx, yy); + vec2 m = texelFetch(TEX_FLAT_MOTION, p, 0).xy; + float l = dot(m, m); + if(l > len) { + len = l; + motion = m; + } + + } + } + } + + motion *= vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); + + vec2 pos_prev = vec2(ipos) + vec2(0.5) + motion.xy; + + if(all(greaterThanEqual(ivec2(pos_prev), ivec2(1))) + && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - 1))) + { + // Motion vector was valid - sample the previous frame + vec3 color_prev = sample_texture_catmull_rom(TEX_ASVGF_TAA_B, pos_prev).rgb; + + if(!any(isnan(color_prev))) + { + // If enabled, apply neighbourhood color clamping (NCC) + if(global_ubo.flt_taa_variance > 0) + { + float variance_scale = global_ubo.flt_taa_variance; + + if(checkerboard_flags == (CHECKERBOARD_FLAG_REFLECTION | CHECKERBOARD_FLAG_REFRACTION)) + variance_scale *= 2; + + vec3 sigma = sqrt(max(vec3(0), mom2 - mom1 * mom1)); + vec3 mi = mom1 - sigma * variance_scale; + vec3 ma = mom1 + sigma * variance_scale; + + color_prev = clamp(color_prev, mi, ma); + } + + // Mix the new color with the clamped previous color + float motion_weight = smoothstep(0, 1.0f, sqrt(dot(motion, motion))); + float sample_weight = get_sample_weight(nearest_render_pos - int_render_pos, global_ubo.unscaled_width * global_ubo.inv_width); + float pixel_weight = max(motion_weight, sample_weight) * 0.1f; + pixel_weight = clamp(pixel_weight, 0, 1); + color_output = mix(color_prev, color_center, pixel_weight); + } + } + + linear_color_output = PQDecode(color_output); + } + else if(global_ubo.temporal_blend_factor > 0) + { + // Temporal accumulation in reference path tracing mode. + // The frame is supposed to be static (paused), so no motion vectors or high quality sampling. + // The accumulator is an RGBA32_FLOAT texture for higher accuracy. + + if(global_ubo.temporal_blend_factor < 1) + { + vec3 prev_color = imageLoad(IMG_HQ_COLOR_INTERLEAVED, ipos).rgb; + linear_color_output = mix(prev_color, linear_color_output, global_ubo.temporal_blend_factor); + } + + imageStore(IMG_HQ_COLOR_INTERLEAVED, ipos, vec4(linear_color_output, 0)); + + color_output = PQEncode(linear_color_output); + } + + bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.unscaled_width / 2, global_ubo.unscaled_height / 2))); + if(is_readback_pixel) + { + readback.hdr_color = linear_color_output; + } + + imageStore(IMG_ASVGF_TAA_A, ipos, vec4(color_output, 0)); + imageStore(IMG_TAA_OUTPUT, ipos, vec4(linear_color_output, 1)); +} diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 373ae2fbb..72a2b37c2 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -108,9 +108,9 @@ main() vec3 direction; vec2 pixel_offset; - if(global_ubo.temporal_blend_factor > 0) + if(global_ubo.flt_taa == 1 || global_ubo.temporal_blend_factor > 0) { - // Photo mode - use higher quality sampling + // Photo mode or legacy TAA - use higher quality sampling pixel_offset = vec2(get_rng(RNG_PRIMARY_OFF_X), get_rng(RNG_PRIMARY_OFF_Y)); pixel_offset -= vec2(0.5); } diff --git a/src/refresh/vkpt/shader/tone_mapping_apply.comp b/src/refresh/vkpt/shader/tone_mapping_apply.comp index 70e720354..98ac277ba 100644 --- a/src/refresh/vkpt/shader/tone_mapping_apply.comp +++ b/src/refresh/vkpt/shader/tone_mapping_apply.comp @@ -121,7 +121,11 @@ vec3 srgb_dither(vec3 color, ivec2 ipos) void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height)))) + const ivec2 screenSize = (global_ubo.flt_taa == 2) + ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) + : ivec2(global_ubo.width, global_ubo.height); + + if(any(greaterThanEqual(ipos, screenSize))) return; // Get input color and luminance. diff --git a/src/refresh/vkpt/shader/tone_mapping_histogram.comp b/src/refresh/vkpt/shader/tone_mapping_histogram.comp index 28a376936..252d8dc3e 100644 --- a/src/refresh/vkpt/shader/tone_mapping_histogram.comp +++ b/src/refresh/vkpt/shader/tone_mapping_histogram.comp @@ -172,7 +172,11 @@ shared uint s_Histogram[HISTOGRAM_BINS]; void main() { const ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height)))) + const ivec2 screenSize = (global_ubo.flt_taa == 2) + ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) + : ivec2(global_ubo.width, global_ubo.height); + + if(any(greaterThanEqual(ipos, screenSize))) return; vec3 input_color = imageLoad(IMG_TAA_OUTPUT, ipos).rgb; @@ -207,8 +211,7 @@ void main() const uint right_bin = left_bin + 1; // Compute pixel importance based on distance to the center of the screen. - const vec2 screenSize = vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); - float weight = clamp(1.0 - length(vec2(ipos) / screenSize - vec2(0.5)) * 1.5, 0.01, 1.0); // Spatial weight + float weight = clamp(1.0 - length(vec2(ipos) / vec2(screenSize) - vec2(0.5)) * 1.5, 0.01, 1.0); // Spatial weight const float right_weight_F = fract(histogram_bin) * weight; const float left_weight_F = weight - right_weight_F; diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index ad8355059..2d7255915 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -93,6 +93,7 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_ASVGF_LF_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_TEMPORAL_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_TAA_COMP) \ + SHADER_MODULE_DO(QVK_MOD_ASVGF_TAAU_COMP) \ SHADER_MODULE_DO(QVK_MOD_BLOOM_BLUR_COMP) \ SHADER_MODULE_DO(QVK_MOD_BLOOM_COMPOSITE_COMP) \ SHADER_MODULE_DO(QVK_MOD_TONE_MAPPING_HISTOGRAM_COMP) \ From ac9cdb70aabd1b98d27f9e78222c50468b24b12e Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 06:17:07 -0800 Subject: [PATCH 06/48] Fixed god rays rendering at odd resolutions and disabled the resolution rounding to the nearest 8 pixels. --- src/refresh/vkpt/god_rays.c | 6 +++--- src/refresh/vkpt/main.c | 14 ++++++-------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/refresh/vkpt/god_rays.c b/src/refresh/vkpt/god_rays.c index 14bed5528..6536992e7 100644 --- a/src/refresh/vkpt/god_rays.c +++ b/src/refresh/vkpt/god_rays.c @@ -156,9 +156,9 @@ void vkpt_record_god_rays_trace_command_buffer(VkCommandBuffer command_buffer, i vkCmdPushConstants(command_buffer, god_rays.pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(int), &pass); - uint32_t group_size = THREAD_GROUP_SIZE; - uint32_t group_num_x = (qvk.extent_render.width / (2 * qvk.device_count) + (group_size - 1)) / group_size; - uint32_t group_num_y = (qvk.extent_render.height / 2 + (group_size - 1)) / group_size; + uint32_t group_size = THREAD_GROUP_SIZE * 2; + uint32_t group_num_x = (qvk.extent_render.width / qvk.device_count + (group_size - 1)) / group_size; + uint32_t group_num_y = (qvk.extent_render.height + (group_size - 1)) / group_size; vkCmdDispatch(command_buffer, group_num_x, group_num_y, 1); diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 6040ff76c..251ce28b3 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -193,8 +193,7 @@ static VkExtent2D get_render_extent() result.width = (uint32_t)(qvk.extent_unscaled.width * (float)scale / 100.f); result.height = (uint32_t)(qvk.extent_unscaled.height * (float)scale / 100.f); - result.width = (result.width + 7) & ~7; - result.height = (result.height + 7) & ~7; + result.width = (result.width + 1) & ~1; return result; } @@ -214,8 +213,7 @@ static VkExtent2D get_screen_image_extent() result.height = max(qvk.extent_render.height, qvk.extent_unscaled.height); } - result.width = (result.width + 7) & ~7; - result.height = (result.height + 7) & ~7; + result.width = (result.width + 1) & ~1; return result; } @@ -2791,12 +2789,12 @@ R_EndFrame_RTX(void) } else { - VkExtent2D extent_render_double; - extent_render_double.width = qvk.extent_render.width * 2; - extent_render_double.height = qvk.extent_render.height * 2; + VkExtent2D extent_unscaled_half; + extent_unscaled_half.width = qvk.extent_unscaled.width / 2; + extent_unscaled_half.height = qvk.extent_unscaled.height / 2; if (extents_equal(qvk.extent_render, qvk.extent_unscaled) || - extents_equal(extent_render_double, qvk.extent_unscaled) && drs_current_scale == 0) // don't do nearest filter 2x upscale with DRS enabled + extents_equal(qvk.extent_render, extent_unscaled_half) && drs_current_scale == 0) // don't do nearest filter 2x upscale with DRS enabled vkpt_final_blit_simple(cmd_buf); else vkpt_final_blit_filtered(cmd_buf); From 244c967aab23172721f816cd59ba4d7f0a941c5b Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 07:43:32 -0800 Subject: [PATCH 07/48] Fixed rendering in regular TAA mode when scaling is over 100%. --- src/refresh/vkpt/bloom.c | 27 ++++++++++--------- src/refresh/vkpt/main.c | 10 ++++++- src/refresh/vkpt/shader/bloom_blur.comp | 4 +-- src/refresh/vkpt/shader/bloom_composite.comp | 4 +-- src/refresh/vkpt/shader/global_textures.h | 16 +++++------ src/refresh/vkpt/shader/global_ubo.h | 5 ++++ .../vkpt/shader/tone_mapping_apply.comp | 2 +- .../vkpt/shader/tone_mapping_histogram.comp | 2 +- src/refresh/vkpt/tone_mapping.c | 11 +++++--- src/refresh/vkpt/vkpt.h | 1 + 10 files changed, 51 insertions(+), 31 deletions(-) diff --git a/src/refresh/vkpt/bloom.c b/src/refresh/vkpt/bloom.c index 726d96c05..2f9bf87f6 100644 --- a/src/refresh/vkpt/bloom.c +++ b/src/refresh/vkpt/bloom.c @@ -301,6 +301,9 @@ vkpt_bloom_destroy_pipelines() VkResult vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) { + qboolean taau = cvar_flt_taa->integer == 2; + VkExtent2D extent = taau ? qvk.extent_unscaled : qvk.extent_render; + compute_push_constants(); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_TAA_OUTPUT]); @@ -321,20 +324,20 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) }; VkOffset3D offset_LR_mip_0 = { - .x = qvk.extent_unscaled.width, - .y = qvk.extent_unscaled.height, + .x = extent.width, + .y = extent.height, .z = 1 }; VkOffset3D offset_LR_mip_1 = { - .x = qvk.extent_unscaled.width / 2, - .y = qvk.extent_unscaled.height / 2, + .x = extent.width / 2, + .y = extent.height / 2, .z = 1 }; VkOffset3D offset_LR_mip_2 = { - .x = qvk.extent_unscaled.width / 4, - .y = qvk.extent_unscaled.height / 4, + .x = extent.width / 4, + .y = extent.height / 4, .z = 1 }; @@ -396,8 +399,8 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) vkCmdPushConstants(cmd_buf, pipeline_layout_blur, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_hblur), &push_constants_hblur); vkCmdDispatch(cmd_buf, - (qvk.extent_unscaled.width / 4 + 15) / 16, - (qvk.extent_unscaled.height / 4 + 15) / 16, + (extent.width / 4 + 15) / 16, + (extent.height / 4 + 15) / 16, 1); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_BLOOM_HBLUR]); @@ -406,8 +409,8 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) vkCmdPushConstants(cmd_buf, pipeline_layout_blur, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_vblur), &push_constants_vblur); vkCmdDispatch(cmd_buf, - (qvk.extent_unscaled.width / 4 + 15) / 16, - (qvk.extent_unscaled.height / 4 + 15) / 16, + (extent.width / 4 + 15) / 16, + (extent.height / 4 + 15) / 16, 1); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_BLOOM_VBLUR]); @@ -477,8 +480,8 @@ vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_composite, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, - (qvk.extent_unscaled.width + 15) / 16, - (qvk.extent_unscaled.height + 15) / 16, + (extent.width + 15) / 16, + (extent.height + 15) / 16, 1); } diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 251ce28b3..304f2a142 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -230,6 +230,10 @@ vkpt_initialize_all(VkptInitFlags_t init_flags) qvk.extent_render = get_render_extent(); qvk.extent_screen_images = get_screen_image_extent(); + + qvk.extent_taa.width = max(qvk.extent_screen_images.width, qvk.extent_unscaled.width); + qvk.extent_taa.height = max(qvk.extent_screen_images.height, qvk.extent_unscaled.height); + qvk.gpu_slice_width = (qvk.extent_render.width + qvk.device_count - 1) / qvk.device_count; for(int i = 0; i < LENGTH(vkpt_initialization); i++) { @@ -2110,6 +2114,10 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->unscaled_height = qvk.extent_unscaled.height; ubo->inv_unscaled_width = 1.0f / ubo->unscaled_width; ubo->inv_unscaled_height = 1.0f / ubo->unscaled_height; + ubo->taa_width = qvk.extent_taa.width; + ubo->taa_height = qvk.extent_taa.height; + ubo->inv_taa_width = 1.0f / ubo->taa_width; + ubo->inv_taa_height = 1.0f / ubo->taa_height; ubo->current_gpu_slice_width = qvk.gpu_slice_width; ubo->prev_gpu_slice_width = qvk.gpu_slice_width_prev; ubo->screen_image_width = qvk.extent_screen_images.width; @@ -2708,7 +2716,7 @@ R_BeginFrame_RTX(void) qvk.extent_render = get_render_extent(); qvk.gpu_slice_width = (qvk.extent_render.width + qvk.device_count - 1) / qvk.device_count; - + VkExtent2D extent_screen_images = get_screen_image_extent(); if(!extents_equal(extent_screen_images, qvk.extent_screen_images)) diff --git a/src/refresh/vkpt/shader/bloom_blur.comp b/src/refresh/vkpt/shader/bloom_blur.comp index 3e9d25a85..816112e74 100644 --- a/src/refresh/vkpt/shader/bloom_blur.comp +++ b/src/refresh/vkpt/shader/bloom_blur.comp @@ -39,7 +39,7 @@ layout (push_constant) uniform push_constant_block { vec2 img_to_uv(vec2 ipos) { - vec2 uv = ipos / vec2(global_ubo.unscaled_width / 4, global_ubo.unscaled_height / 4); + vec2 uv = ipos / vec2(global_ubo.taa_width / 4, global_ubo.taa_height / 4); return uv; } @@ -72,7 +72,7 @@ void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - ivec2 bloom_extent = ivec2(global_ubo.unscaled_width / 4, global_ubo.unscaled_height / 4); + ivec2 bloom_extent = ivec2(global_ubo.taa_width / 4, global_ubo.taa_height / 4); if(any(greaterThanEqual(ipos, bloom_extent))) { diff --git a/src/refresh/vkpt/shader/bloom_composite.comp b/src/refresh/vkpt/shader/bloom_composite.comp index 1eaab4622..efebfa453 100644 --- a/src/refresh/vkpt/shader/bloom_composite.comp +++ b/src/refresh/vkpt/shader/bloom_composite.comp @@ -30,7 +30,7 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; vec2 img_to_uv(ivec2 ipos) { - vec2 uv = (vec2(ipos) + vec2(0.5, 0.5)) / vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); + vec2 uv = (vec2(ipos) + vec2(0.5, 0.5)) / vec2(global_ubo.taa_width, global_ubo.taa_height); uv = clamp(uv, 0, 1); return uv; } @@ -39,7 +39,7 @@ vec2 img_to_uv(ivec2 ipos) void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height)))) + if(any(greaterThanEqual(ipos, ivec2(global_ubo.taa_width, global_ubo.taa_height)))) return; vec2 uv = img_to_uv(ipos); diff --git a/src/refresh/vkpt/shader/global_textures.h b/src/refresh/vkpt/shader/global_textures.h index b0032fbea..0ee956179 100644 --- a/src/refresh/vkpt/shader/global_textures.h +++ b/src/refresh/vkpt/shader/global_textures.h @@ -63,10 +63,10 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(FLAT_COLOR, 26, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(FLAT_MOTION, 27, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(PT_GODRAYS_THROUGHPUT_DIST,28, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(BLOOM_DOWNSCALE_MIP_1, 29, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED / 2, IMG_HEIGHT_UNSCALED / 2 ) \ - IMG_DO(BLOOM_HBLUR, 30, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED / 4, IMG_HEIGHT_UNSCALED / 4 ) \ - IMG_DO(BLOOM_VBLUR, 31, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED / 4, IMG_HEIGHT_UNSCALED / 4 ) \ - IMG_DO(TAA_OUTPUT, 32, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ + IMG_DO(BLOOM_DOWNSCALE_MIP_1, 29, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 2, qvk.extent_taa.height / 2 ) \ + IMG_DO(BLOOM_HBLUR, 30, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ + IMG_DO(BLOOM_VBLUR, 31, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ + IMG_DO(TAA_OUTPUT, 32, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ IMG_DO(PT_VIEW_DIRECTION, 33, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_VIEW_DIRECTION2, 34, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_THROUGHPUT, 35, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -89,8 +89,8 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(ASVGF_FILTERED_SPEC_B, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ - IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ + IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ IMG_DO(ASVGF_RNG_SEED_A, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_B, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -109,8 +109,8 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(ASVGF_FILTERED_SPEC_A, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ - IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_UNSCALED, IMG_HEIGHT_UNSCALED ) \ + IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ IMG_DO(ASVGF_RNG_SEED_B, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_A, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 0ec327eaf..2d5944876 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -185,6 +185,11 @@ with this program; if not, write to the Free Software Foundation, Inc., GLOBAL_UBO_VAR_LIST_DO(float, prev_adapted_luminance) \ GLOBAL_UBO_VAR_LIST_DO(float, padding1) \ \ + GLOBAL_UBO_VAR_LIST_DO(float, taa_width) \ + GLOBAL_UBO_VAR_LIST_DO(float, taa_height) \ + GLOBAL_UBO_VAR_LIST_DO(float, inv_taa_width) \ + GLOBAL_UBO_VAR_LIST_DO(float, inv_taa_height) \ + \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_center) \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_size) \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_half_size_inv) \ diff --git a/src/refresh/vkpt/shader/tone_mapping_apply.comp b/src/refresh/vkpt/shader/tone_mapping_apply.comp index 98ac277ba..7c86fcbf8 100644 --- a/src/refresh/vkpt/shader/tone_mapping_apply.comp +++ b/src/refresh/vkpt/shader/tone_mapping_apply.comp @@ -123,7 +123,7 @@ void main() ivec2 ipos = ivec2(gl_GlobalInvocationID); const ivec2 screenSize = (global_ubo.flt_taa == 2) ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - : ivec2(global_ubo.width, global_ubo.height); + : ivec2(global_ubo.taa_width, global_ubo.taa_width); if(any(greaterThanEqual(ipos, screenSize))) return; diff --git a/src/refresh/vkpt/shader/tone_mapping_histogram.comp b/src/refresh/vkpt/shader/tone_mapping_histogram.comp index 252d8dc3e..520211911 100644 --- a/src/refresh/vkpt/shader/tone_mapping_histogram.comp +++ b/src/refresh/vkpt/shader/tone_mapping_histogram.comp @@ -174,7 +174,7 @@ void main() const ivec2 ipos = ivec2(gl_GlobalInvocationID); const ivec2 screenSize = (global_ubo.flt_taa == 2) ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - : ivec2(global_ubo.width, global_ubo.height); + : ivec2(global_ubo.taa_width, global_ubo.taa_width); if(any(greaterThanEqual(ipos, screenSize))) return; diff --git a/src/refresh/vkpt/tone_mapping.c b/src/refresh/vkpt/tone_mapping.c index 64440432a..001baf57f 100644 --- a/src/refresh/vkpt/tone_mapping.c +++ b/src/refresh/vkpt/tone_mapping.c @@ -70,6 +70,7 @@ static VkPipelineLayout pipeline_layout_tone_mapping_curve; static VkPipelineLayout pipeline_layout_tone_mapping_apply; static int reset_required = 1; // If 1, recomputes tone curve based only on this frame +extern cvar_t* cvar_flt_taa; // Creates our pipeline layouts. VkResult @@ -260,8 +261,8 @@ vkpt_tone_mapping_record_cmd_buffer(VkCommandBuffer cmd_buf, float frame_time) pipeline_layout_tone_mapping_histogram, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, - (qvk.extent_unscaled.width + 15) / 16, - (qvk.extent_unscaled.height + 15) / 16, + (qvk.extent_taa.width + 15) / 16, + (qvk.extent_taa.height + 15) / 16, 1); BUFFER_BARRIER(cmd_buf, @@ -384,9 +385,11 @@ vkpt_tone_mapping_record_cmd_buffer(VkCommandBuffer cmd_buf, float frame_time) vkCmdPushConstants(cmd_buf, pipeline_layout_tone_mapping_apply, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_tm2_apply), push_constants_tm2_apply); + qboolean taau = cvar_flt_taa->integer == 2; + vkCmdDispatch(cmd_buf, - (qvk.extent_unscaled.width + 15) / 16, - (qvk.extent_unscaled.height + 15) / 16, + ((taau ? qvk.extent_unscaled.width : qvk.extent_render.width) + 15) / 16, + ((taau ? qvk.extent_unscaled.height : qvk.extent_render.height) + 15) / 16, 1); // Because VKPT_IMG_TAA_OUTPUT changed, we make sure to wait for the image diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 2d7255915..41123d2e1 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -185,6 +185,7 @@ typedef struct QVK_s { VkExtent2D extent_render; VkExtent2D extent_render_prev; VkExtent2D extent_unscaled; + VkExtent2D extent_taa; uint32_t gpu_slice_width; uint32_t gpu_slice_width_prev; uint32_t num_swap_chain_images; From 33cf326b8b5c73ec7c662338278415b135bf20a8 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 09:15:43 -0800 Subject: [PATCH 08/48] Fixed the tone mapper once again. --- src/refresh/vkpt/shader/tone_mapping_histogram.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/refresh/vkpt/shader/tone_mapping_histogram.comp b/src/refresh/vkpt/shader/tone_mapping_histogram.comp index 520211911..38546de25 100644 --- a/src/refresh/vkpt/shader/tone_mapping_histogram.comp +++ b/src/refresh/vkpt/shader/tone_mapping_histogram.comp @@ -174,7 +174,7 @@ void main() const ivec2 ipos = ivec2(gl_GlobalInvocationID); const ivec2 screenSize = (global_ubo.flt_taa == 2) ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - : ivec2(global_ubo.taa_width, global_ubo.taa_width); + : ivec2(global_ubo.width, global_ubo.width); if(any(greaterThanEqual(ipos, screenSize))) return; From 5d741ba8e6455c49a70837e5b7e7836bc31c70cc Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 09:43:12 -0800 Subject: [PATCH 09/48] Improved and re-tuned the temporal filter. --- src/refresh/vkpt/shader/asvgf_temporal.comp | 74 +++++++++++++-------- src/refresh/vkpt/shader/global_ubo.h | 4 +- 2 files changed, 48 insertions(+), 30 deletions(-) diff --git a/src/refresh/vkpt/shader/asvgf_temporal.comp b/src/refresh/vkpt/shader/asvgf_temporal.comp index f42d3624e..8b0ece7c1 100644 --- a/src/refresh/vkpt/shader/asvgf_temporal.comp +++ b/src/refresh/vkpt/shader/asvgf_temporal.comp @@ -53,6 +53,7 @@ shared float s_depth_width[GROUP_SIZE/GRAD_DWN][GROUP_SIZE/GRAD_DWN]; #include "utils.glsl" #include "asvgf.glsl" +#include "brdf.glsl" // Preload the color data into shared memory void @@ -104,7 +105,7 @@ main() ivec2 ipos = ivec2(gl_GlobalInvocationID); vec4 motion = texelFetch(TEX_PT_MOTION, ipos, 0); - + vec2 pos_prev = ((vec2(ipos) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); // Load the parameters of the target pixel @@ -113,14 +114,18 @@ main() float lum_curr_hf; get_shared_data(ivec2(0), depth_curr, normal_curr, lum_curr_hf); + vec2 metal_rough = texelFetch(TEX_PT_METALLIC, ipos, 0).xy; + float shininess = clamp(2.0 / square(square(metal_rough.y)) - 2.0, 0.0, 32.0); + // Try to get the history sample for all channels, including HF moments - bool temporal_sample_valid = false; + bool temporal_sample_valid_diff = false; + bool temporal_sample_valid_spec = false; SH temporal_color_lf = init_SH(); vec3 temporal_color_hf = vec3(0); vec4 temporal_color_histlen_spec = vec4(0); vec4 temporal_moments_histlen_hf = vec4(0); { - float temporal_sum_w = 0.0; + float temporal_sum_w_diff = 0.0; float temporal_sum_w_spec = 0.0; vec2 pos_ld = floor(pos_prev - vec2(0.5)); @@ -151,7 +156,7 @@ main() float depth_prev = texelFetch(TEX_PT_VIEW_DEPTH_B, p, 0).x; vec3 normal_prev = decode_normal(texelFetch(TEX_PT_NORMAL_B, p, 0).x); - float dist_depth = abs(depth_curr - depth_prev + motion.z) * motion.a; + float dist_depth = abs(depth_curr - depth_prev + motion.z) * motion.a / abs(depth_curr); float dot_normals = dot(normal_curr, normal_prev); if(depth_curr < 0) @@ -161,10 +166,10 @@ main() dist_depth *= 0.25; } - if(dist_depth < 2.0 && dot_normals > 0.5) + if(dist_depth < 0.1) { - float w_diff = w[i]; - float w_spec = w_diff * pow(max(dot_normals, 0), 128); + float w_diff = w[i] * max(dot_normals, 0); + float w_spec = w[i] * pow(max(dot_normals, 0), shininess); SH hist_color_lf = load_SH(TEX_ASVGF_HIST_COLOR_LF_SH_B, TEX_ASVGF_HIST_COLOR_LF_COCG_B, p); accumulate_SH(temporal_color_lf, hist_color_lf, w_diff); @@ -172,22 +177,27 @@ main() temporal_color_hf += unpackRGBE(texelFetch(TEX_ASVGF_HIST_COLOR_HF, p, 0).x) * w_diff; temporal_color_histlen_spec += texelFetch(TEX_ASVGF_FILTERED_SPEC_B, p, 0) * w_spec; temporal_moments_histlen_hf += texelFetch(TEX_ASVGF_HIST_MOMENTS_HF_B, p, 0).rgba * w_diff; - temporal_sum_w += w_diff; + temporal_sum_w_diff += w_diff; temporal_sum_w_spec += w_spec; } } - // We found some relevant surface - good - if(temporal_sum_w_spec > 1e-6) + // We found some relevant surfaces - good + if(temporal_sum_w_diff > 1e-6) { - float inv_w_diff = 1.0 / temporal_sum_w; - float inv_w_spec = 1.0 / temporal_sum_w_spec; + float inv_w_diff = 1.0 / temporal_sum_w_diff; temporal_color_lf.shY *= inv_w_diff; temporal_color_lf.CoCg *= inv_w_diff; temporal_color_hf *= inv_w_diff; - temporal_color_histlen_spec *= inv_w_spec; temporal_moments_histlen_hf *= inv_w_diff; - temporal_sample_valid = true; + temporal_sample_valid_diff = true; + } + + if(temporal_sum_w_spec > 1e-6) + { + float inv_w_spec = 1.0 / temporal_sum_w_spec; + temporal_color_histlen_spec *= inv_w_spec; + temporal_sample_valid_spec = true; } } @@ -232,56 +242,64 @@ main() vec4 out_color_histlen_spec; vec4 out_moments_histlen_hf; - if(temporal_sample_valid) - { - // Load the gradients - float grad_lf = texelFetch(TEX_ASVGF_GRAD_LF_PONG, ipos / GRAD_DWN, 0).r; - vec2 grad_hf_spec = texelFetch(TEX_ASVGF_GRAD_HF_SPEC_PONG, ipos / GRAD_DWN, 0).rg; - grad_lf = clamp(grad_lf, 0, 1); - grad_hf_spec = clamp(grad_hf_spec, vec2(0), vec2(1)); + // Load the gradients + float grad_lf = texelFetch(TEX_ASVGF_GRAD_LF_PONG, ipos / GRAD_DWN, 0).r; + vec2 grad_hf_spec = texelFetch(TEX_ASVGF_GRAD_HF_SPEC_PONG, ipos / GRAD_DWN, 0).rg; + grad_lf = clamp(grad_lf, 0, 1); + grad_hf_spec = clamp(grad_hf_spec, vec2(0), vec2(1)); + if(temporal_sample_valid_diff) + { // Compute the antilag factors based on the gradients float antilag_alpha_lf = clamp(mix(1.0, global_ubo.flt_antilag_lf * grad_lf, global_ubo.flt_temporal_lf), 0, 1); float antilag_alpha_hf = clamp(mix(1.0, global_ubo.flt_antilag_hf * grad_hf_spec.x, global_ubo.flt_temporal_hf), 0, 1); - float antilag_alpha_spec = clamp(mix(1.0, global_ubo.flt_antilag_spec * grad_hf_spec.y, global_ubo.flt_temporal_spec), 0, 1); // Adjust the history length, taking the antilag factors into account float hist_len_hf = min(temporal_moments_histlen_hf.b * pow(1.0 - antilag_alpha_hf, 10) + 1.0, 256.0); float hist_len_lf = min(temporal_moments_histlen_hf.a * pow(1.0 - antilag_alpha_lf, 10) + 1.0, 256.0); - float hist_len_spec = min(temporal_color_histlen_spec.a * pow(1.0 - antilag_alpha_spec, 10) + 1.0, 256.0); // Compute the blending weights based on history length, so that the filter // converges faster. I.e. the first frame has weight of 1.0, the second frame 1/2, third 1/3 and so on. float alpha_color_lf = max(global_ubo.flt_min_alpha_color_lf, 1.0 / hist_len_lf); float alpha_color_hf = max(global_ubo.flt_min_alpha_color_hf, 1.0 / hist_len_hf); - float alpha_color_spec = max(global_ubo.flt_min_alpha_color_spec, 1.0 / hist_len_spec); float alpha_moments_hf = max(global_ubo.flt_min_alpha_moments_hf, 1.0 / hist_len_hf); // Adjust the blending factors, taking the antilag factors into account again alpha_color_lf = mix(alpha_color_lf, 1.0, antilag_alpha_lf); alpha_color_hf = mix(alpha_color_hf, 1.0, antilag_alpha_hf); - alpha_color_spec = mix(alpha_color_spec, 1.0, antilag_alpha_spec); alpha_moments_hf = mix(alpha_moments_hf, 1.0, antilag_alpha_hf); // Blend! out_color_lf = mix_SH(temporal_color_lf, color_curr_lf, alpha_color_lf); out_color_hf.rgb = mix(temporal_color_hf.rgb, color_curr_hf.rgb, alpha_color_hf); - out_color_histlen_spec.rgb = mix(temporal_color_histlen_spec.rgb, color_curr_spec.rgb, alpha_color_spec); out_moments_histlen_hf.rg = mix(temporal_moments_histlen_hf.rg, spatial_moments_hf.rg, alpha_moments_hf); out_moments_histlen_hf.b = hist_len_hf; out_moments_histlen_hf.a = hist_len_lf; - out_color_histlen_spec.a = hist_len_spec; } else { // No valid history - just use the current color and spatial moments out_color_lf = color_curr_lf; out_color_hf.rgb = color_curr_hf; - out_color_histlen_spec = vec4(color_curr_spec, 1); out_moments_histlen_hf = vec4(spatial_moments_hf, 1, 1); } + if(temporal_sample_valid_spec) + { + // Same sequence as above, only for the specular channel + float antilag_alpha_spec = clamp(mix(1.0, global_ubo.flt_antilag_spec * grad_hf_spec.y, global_ubo.flt_temporal_spec), 0, 1); + float hist_len_spec = min(temporal_color_histlen_spec.a * pow(1.0 - antilag_alpha_spec, 10) + 1.0, 256.0); + float alpha_color_spec = max(global_ubo.flt_min_alpha_color_spec, 1.0 / hist_len_spec); + alpha_color_spec = mix(alpha_color_spec, 1.0, antilag_alpha_spec); + out_color_histlen_spec.rgb = mix(temporal_color_histlen_spec.rgb, color_curr_spec.rgb, alpha_color_spec); + out_color_histlen_spec.a = hist_len_spec; + } + else + { + out_color_histlen_spec = vec4(color_curr_spec, 1); + } + // Store the outputs for furhter processing by the a-trous HF filter imageStore(IMG_ASVGF_HIST_MOMENTS_HF_A, ipos, out_moments_histlen_hf); STORE_SH(IMG_ASVGF_HIST_COLOR_LF_SH_A, IMG_ASVGF_HIST_COLOR_LF_COCG_A, ipos, out_color_lf); diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 2d5944876..79ee206b5 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -49,9 +49,9 @@ with this program; if not, write to the Free Software Foundation, Inc., UBO_CVAR_DO(flt_enable, 1) /* switch for the entire SVGF reconstruction, 0 or 1 */ \ UBO_CVAR_DO(flt_fixed_albedo, 0) /* if nonzero, replaces surface albedo with that value after filtering */ \ UBO_CVAR_DO(flt_grad_transparent, 0.3) /* gradient scale for reflections and refractions, [0..1] */ \ - UBO_CVAR_DO(flt_min_alpha_color_hf, 0.1) /* minimum weight for the new frame data, color channel, (0..1] */ \ + UBO_CVAR_DO(flt_min_alpha_color_hf, 0.02) /* minimum weight for the new frame data, color channel, (0..1] */ \ UBO_CVAR_DO(flt_min_alpha_color_lf, 0.01) \ - UBO_CVAR_DO(flt_min_alpha_color_spec, 0.1) \ + UBO_CVAR_DO(flt_min_alpha_color_spec, 0.01) \ UBO_CVAR_DO(flt_min_alpha_moments_hf, 0.01) /* minimum weight for the new frame data, moments channel, (0..1] */ \ UBO_CVAR_DO(flt_scale_hf, 1) /* overall per-channel output scale, [0..inf) */ \ UBO_CVAR_DO(flt_scale_lf, 1) \ From d27d30c25b127f0c62b0cd683cc195295282b422 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 10:44:38 -0800 Subject: [PATCH 10/48] Fixed the excessive gradients in reflections and refractions in TAAU mode. --- src/refresh/vkpt/main.c | 6 ++++++ src/refresh/vkpt/shader/global_ubo.h | 4 ++++ src/refresh/vkpt/shader/primary_rays.rgen | 19 +++++++++++-------- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 304f2a142..811fffab8 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -2233,11 +2233,17 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c int taa_index = (int)(qvk.frame_counter % NUM_TAA_SAMPLES); ubo->sub_pixel_jitter[0] = taa_samples[taa_index][0]; ubo->sub_pixel_jitter[1] = taa_samples[taa_index][1]; + + taa_index = (int)((qvk.frame_counter - 1) % NUM_TAA_SAMPLES); + ubo->prev_sub_pixel_jitter[0] = taa_samples[taa_index][0]; + ubo->prev_sub_pixel_jitter[1] = taa_samples[taa_index][1]; } else { ubo->sub_pixel_jitter[0] = 0.f; ubo->sub_pixel_jitter[1] = 0.f; + ubo->prev_sub_pixel_jitter[0] = 0.f; + ubo->prev_sub_pixel_jitter[1] = 0.f; } ubo->first_person_model = cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON; diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 79ee206b5..73698ee4a 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -182,8 +182,12 @@ with this program; if not, write to the Free Software Foundation, Inc., GLOBAL_UBO_VAR_LIST_DO(float, inv_unscaled_height) \ \ GLOBAL_UBO_VAR_LIST_DO(vec2, sub_pixel_jitter) \ + GLOBAL_UBO_VAR_LIST_DO(vec2, prev_sub_pixel_jitter) \ + \ GLOBAL_UBO_VAR_LIST_DO(float, prev_adapted_luminance) \ GLOBAL_UBO_VAR_LIST_DO(float, padding1) \ + GLOBAL_UBO_VAR_LIST_DO(float, padding2) \ + GLOBAL_UBO_VAR_LIST_DO(float, padding3) \ \ GLOBAL_UBO_VAR_LIST_DO(float, taa_width) \ GLOBAL_UBO_VAR_LIST_DO(float, taa_height) \ diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 72a2b37c2..85965293d 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -103,6 +103,8 @@ main() bool is_odd_checkerboard = (gl_LaunchIDNV.z != 0) || (push_constants.gpu_index == 1); + bool is_gradient = get_is_gradient(ipos); + rng_seed = texelFetch(TEX_ASVGF_RNG_SEED_A, ipos, 0).r; vec3 position; vec3 direction; @@ -117,15 +119,13 @@ main() else { // Real-time mode - use predictable sampling for TAAU - pixel_offset = global_ubo.sub_pixel_jitter; + pixel_offset = is_gradient ? global_ubo.prev_sub_pixel_jitter : global_ubo.sub_pixel_jitter; } const ivec2 image_position = get_image_position(); const vec2 pixel_center = vec2(image_position) + vec2(0.5); const vec2 inUV = (pixel_center + pixel_offset) / vec2(get_image_size()); - bool is_gradient = get_is_gradient(ipos); - Ray ray = get_primary_ray(inUV); if(is_gradient) @@ -253,19 +253,22 @@ main() vec3 geo_normal = normalize(triangle.normals * bary); /* compute view-space derivatives of depth and motion vectors */ + Ray ray_0 = get_primary_ray(inUV); Ray ray_x = get_primary_ray(inUV + vec2(1.0 / float(global_ubo.width), 0)); Ray ray_y = get_primary_ray(inUV + vec2(0, 1.0 / float(global_ubo.height))); + vec3 bary_0 = compute_barycentric(triangle.positions, ray_0.origin, ray_0.direction); vec3 bary_x = compute_barycentric(triangle.positions, ray_x.origin, ray_x.direction); vec3 bary_y = compute_barycentric(triangle.positions, ray_y.origin, ray_y.direction); - vec3 pos_ws_x= triangle.positions * bary_x; - vec3 pos_ws_y= triangle.positions * bary_y; + vec3 pos_ws_x = triangle.positions * bary_x; + vec3 pos_ws_y = triangle.positions * bary_y; + vec2 tex_coord_0 = triangle.tex_coords * bary_0; vec2 tex_coord_x = triangle.tex_coords * bary_x; vec2 tex_coord_y = triangle.tex_coords * bary_y; - tex_coord_x -= tex_coord; - tex_coord_y -= tex_coord; + tex_coord_x -= tex_coord_0; + tex_coord_y -= tex_coord_0; if(global_ubo.pt_texture_lod_bias != 0) { tex_coord_x *= pow(2.0, global_ubo.pt_texture_lod_bias); @@ -292,7 +295,7 @@ main() imageStore(IMG_PT_MOTION, ipos, vec4(motion, fwidth_depth)); // Compute angle between adjacent rays using approximate acos(dot(...)), assume horizontal angle == vertical angle - float footprint_size_over_distance = sqrt(max(0, 2.0 - 2.0 * dot(ray_x.direction, ray.direction))); + float footprint_size_over_distance = sqrt(max(0, 2.0 - 2.0 * dot(ray_x.direction, ray_0.direction))); if(is_gradient) { From 9010ea9b0ec0ef7655ed905fbf2f327883a71a03 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 10:46:41 -0800 Subject: [PATCH 11/48] Set version to 1.3.99 to indicate this is a prerel 1.4. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4d968a6f..a55011f9b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") project(quake2-RTX) set(Q2RTX_VERSION_MAJOR 1) set(Q2RTX_VERSION_MINOR 3) -set(Q2RTX_VERSION_POINT 0) +set(Q2RTX_VERSION_POINT 99) # get short-hash execute_process( From ed4bc5aed7ff970f0a2a121b688b88994874148b Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 10:50:38 -0800 Subject: [PATCH 12/48] Fixed the CFLAGS for MinSizeRel and RelWithDebInfo builds. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a55011f9b..7745e8ea2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,9 +41,9 @@ endif () IF(WIN32) set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /MTd") - set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_RELEASE} /MT") + set(CMAKE_C_FLAGS_MINSIZEREL "${CMAKE_C_FLAGS_MINSIZEREL} /MT") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /MT") - set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELEASE} /MT") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "${CMAKE_C_FLAGS_RELWITHDEBINFO} /MT") ENDIF() add_subdirectory(extern) From 65e265300c8e1a8b04eb79fc15a448bed9e6f16b Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 10:57:13 -0800 Subject: [PATCH 13/48] Fixed the nearest filter when DRS was enabled and then disabled. --- src/refresh/vkpt/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 811fffab8..358688a16 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -2808,7 +2808,7 @@ R_EndFrame_RTX(void) extent_unscaled_half.height = qvk.extent_unscaled.height / 2; if (extents_equal(qvk.extent_render, qvk.extent_unscaled) || - extents_equal(qvk.extent_render, extent_unscaled_half) && drs_current_scale == 0) // don't do nearest filter 2x upscale with DRS enabled + extents_equal(qvk.extent_render, extent_unscaled_half) && drs_effective_scale == 0) // don't do nearest filter 2x upscale with DRS enabled vkpt_final_blit_simple(cmd_buf); else vkpt_final_blit_filtered(cmd_buf); From 2ea10693ed2b26338b695dbd5a87287585037a10 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 10 Nov 2020 10:59:22 -0800 Subject: [PATCH 14/48] Clamp viewsize at 25% minimum. --- src/refresh/vkpt/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 358688a16..1f14d9cb6 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -155,7 +155,7 @@ static void recreate_swapchain(); static void viewsize_changed(cvar_t *self) { - Cvar_ClampInteger(scr_viewsize, 50, 200); + Cvar_ClampInteger(scr_viewsize, 25, 200); Com_Printf("Resolution scale: %d%%\n", scr_viewsize->integer); } From 6b392486028946ced235f2cda2a89d6c8c1621fc Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 18 Nov 2020 10:11:55 -0800 Subject: [PATCH 15/48] Improved the temporal filter to avoid smearing on surfaces that appear at small glancing angles, e.g. floor when going up the stairs. --- src/refresh/vkpt/shader/asvgf_atrous.comp | 4 +- src/refresh/vkpt/shader/asvgf_lf.comp | 4 +- src/refresh/vkpt/shader/asvgf_temporal.comp | 8 ++- src/refresh/vkpt/shader/direct_lighting.rgen | 2 +- src/refresh/vkpt/shader/global_textures.h | 69 ++++++++++--------- .../vkpt/shader/indirect_lighting.rgen | 2 +- src/refresh/vkpt/shader/primary_rays.rgen | 4 +- src/refresh/vkpt/shader/reflect_refract.rgen | 6 +- 8 files changed, 53 insertions(+), 46 deletions(-) diff --git a/src/refresh/vkpt/shader/asvgf_atrous.comp b/src/refresh/vkpt/shader/asvgf_atrous.comp index a24d7c923..4cf478838 100644 --- a/src/refresh/vkpt/shader/asvgf_atrous.comp +++ b/src/refresh/vkpt/shader/asvgf_atrous.comp @@ -210,7 +210,7 @@ SH interpolate_lf(sampler2D img_lf_shY, sampler2D img_lf_CoCg, ivec2 ipos) // Target pixel parameters float depth_center = texelFetch(TEX_PT_VIEW_DEPTH_A, ipos, 0).x; float fwidth_depth = texelFetch(TEX_PT_MOTION, ipos, 0).w; - vec3 geo_normal_center = decode_normal(texelFetch(TEX_PT_GEO_NORMAL, ipos, 0).x); + vec3 geo_normal_center = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos, 0).x); vec2 pos_lowres = (vec2(ipos) + vec2(0.5)) / GRAD_DWN - vec2(0.5); @@ -235,7 +235,7 @@ SH interpolate_lf(sampler2D img_lf_shY, sampler2D img_lf_CoCg, ivec2 ipos) // Low-res pixel parameters float p_depth = texelFetch(TEX_PT_VIEW_DEPTH_A, p_hires, 0).x; - vec3 p_geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL, p_hires, 0).x); + vec3 p_geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, p_hires, 0).x); // Start with bilinear weight float p_w = w[i]; diff --git a/src/refresh/vkpt/shader/asvgf_lf.comp b/src/refresh/vkpt/shader/asvgf_lf.comp index bee641e38..7e604522d 100644 --- a/src/refresh/vkpt/shader/asvgf_lf.comp +++ b/src/refresh/vkpt/shader/asvgf_lf.comp @@ -69,7 +69,7 @@ filter_image( } // Load the parameters of the anchor pixel - vec3 geo_normal_center = decode_normal(texelFetch(TEX_PT_GEO_NORMAL, ipos_hires, 0).x); + vec3 geo_normal_center = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos_hires, 0).x); float depth_center = texelFetch(TEX_PT_VIEW_DEPTH_A, ipos_hires, 0).x; float fwidth_depth = texelFetch(TEX_PT_MOTION, ipos_hires, 0).w; @@ -103,7 +103,7 @@ filter_image( // Use geometric normals here so that we can blur over larger areas. // The lighting detail will be partially preserved by spherical harmonics. - vec3 geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL, p_hires, 0).x); + vec3 geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, p_hires, 0).x); float depth = texelFetch(TEX_PT_VIEW_DEPTH_A, p_hires, 0).x; diff --git a/src/refresh/vkpt/shader/asvgf_temporal.comp b/src/refresh/vkpt/shader/asvgf_temporal.comp index 8b0ece7c1..aef2275c3 100644 --- a/src/refresh/vkpt/shader/asvgf_temporal.comp +++ b/src/refresh/vkpt/shader/asvgf_temporal.comp @@ -117,6 +117,8 @@ main() vec2 metal_rough = texelFetch(TEX_PT_METALLIC, ipos, 0).xy; float shininess = clamp(2.0 / square(square(metal_rough.y)) - 2.0, 0.0, 32.0); + vec3 geo_normal_curr = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos, 0).x); + // Try to get the history sample for all channels, including HF moments bool temporal_sample_valid_diff = false; bool temporal_sample_valid_spec = false; @@ -155,9 +157,11 @@ main() float depth_prev = texelFetch(TEX_PT_VIEW_DEPTH_B, p, 0).x; vec3 normal_prev = decode_normal(texelFetch(TEX_PT_NORMAL_B, p, 0).x); + vec3 geo_normal_prev = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_B, p, 0).x); - float dist_depth = abs(depth_curr - depth_prev + motion.z) * motion.a / abs(depth_curr); + float dist_depth = abs(depth_curr - depth_prev + motion.z) / abs(depth_curr); float dot_normals = dot(normal_curr, normal_prev); + float dot_geo_normals = dot(geo_normal_curr, geo_normal_prev); if(depth_curr < 0) { @@ -166,7 +170,7 @@ main() dist_depth *= 0.25; } - if(dist_depth < 0.1) + if(dist_depth < 0.1 && dot_geo_normals > 0.5) { float w_diff = w[i] * max(dot_normals, 0); float w_spec = w[i] * pow(max(dot_normals, 0), shininess); diff --git a/src/refresh/vkpt/shader/direct_lighting.rgen b/src/refresh/vkpt/shader/direct_lighting.rgen index 15178cedf..c0ac423e1 100644 --- a/src/refresh/vkpt/shader/direct_lighting.rgen +++ b/src/refresh/vkpt/shader/direct_lighting.rgen @@ -55,7 +55,7 @@ direct_lighting(ivec2 ipos, bool is_odd_checkerboard, out vec3 high_freq, out ve vec4 view_direction = texelFetch(TEX_PT_VIEW_DIRECTION, ipos, 0); vec3 normal = decode_normal(texelFetch(TEX_PT_NORMAL_A, ipos, 0).x); - vec3 geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL, ipos, 0).x); + vec3 geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos, 0).x); float primary_roughness = texelFetch(TEX_PT_METALLIC, ipos, 0).y; float primary_specular = texelFetch(TEX_PT_ALBEDO, ipos, 0).a; uint cluster_idx = texelFetch(TEX_PT_CLUSTER, ipos, 0).x; diff --git a/src/refresh/vkpt/shader/global_textures.h b/src/refresh/vkpt/shader/global_textures.h index 0ee956179..ae24ab402 100644 --- a/src/refresh/vkpt/shader/global_textures.h +++ b/src/refresh/vkpt/shader/global_textures.h @@ -76,51 +76,54 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(PT_COLOR_LF_COCG, 39, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_COLOR_HF, 40, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_COLOR_SPEC, 41, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL, 42, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL2, 43, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_CLUSTER, 44, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL2, 42, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER, 43, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ #define LIST_IMAGES_A_B \ - IMG_DO(PT_VIEW_DEPTH_A, 45, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_B, 46, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_A, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_B, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_A, 49, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_B, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_RNG_SEED_A, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_RNG_SEED_B, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 58, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,59, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,60, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_A, 61, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_B, 62, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - -#define LIST_IMAGES_B_A \ + IMG_DO(PT_VIEW_DEPTH_A, 44, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(PT_VIEW_DEPTH_B, 45, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_A, 46, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_A, 46, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_NORMAL_B, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_A, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_B, 49, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_A, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_B, 49, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_FILTERED_SPEC_A, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_RNG_SEED_B, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_B, 55, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ IMG_DO(ASVGF_RNG_SEED_A, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_RNG_SEED_B, 57, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 58, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,59, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 59, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,60, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_B, 61, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,61, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_GRAD_SMPL_POS_A, 62, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_B, 63, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + +#define LIST_IMAGES_B_A \ + IMG_DO(PT_VIEW_DEPTH_B, 44, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_A, 45, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_B, 46, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_A, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_B, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_A, 49, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_B, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_TAA_A, 55, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_RNG_SEED_B, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_RNG_SEED_A, 57, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 58, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 59, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,60, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,61, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_B, 62, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_A, 63, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ -#define NUM_IMAGES 63 /* this really sucks but I don't know how to fix it +#define NUM_IMAGES 64 /* this really sucks but I don't know how to fix it counting with enum does not work in GLSL */ // todo: make naming consistent! diff --git a/src/refresh/vkpt/shader/indirect_lighting.rgen b/src/refresh/vkpt/shader/indirect_lighting.rgen index eae42d72d..1b295c079 100644 --- a/src/refresh/vkpt/shader/indirect_lighting.rgen +++ b/src/refresh/vkpt/shader/indirect_lighting.rgen @@ -78,7 +78,7 @@ indirect_lighting( view_direction = texelFetch(TEX_PT_VIEW_DIRECTION, ipos, 0); normal = decode_normal(texelFetch(TEX_PT_NORMAL_A, ipos, 0).x); - geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL, ipos, 0).x); + geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos, 0).x); if(!half_res) { diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 85965293d..b459f816a 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -183,7 +183,7 @@ main() // Store an empty surface into the G-buffer imageStore(IMG_PT_NORMAL_A, ipos, uvec4(0)); - imageStore(IMG_PT_GEO_NORMAL, ipos, uvec4(0)); + imageStore(IMG_PT_GEO_NORMAL_A, ipos, uvec4(0)); imageStore(IMG_PT_VIEW_DEPTH_A, ipos, vec4(PRIMARY_RAY_T_MAX)); imageStore(IMG_PT_GODRAYS_THROUGHPUT_DIST, ipos, vec4(1, 1, 1, PRIMARY_RAY_T_MAX)); imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, 0)); @@ -434,7 +434,7 @@ main() // Replace the material light style with medium material_id = (material_id & ~MATERIAL_LIGHT_STYLE_MASK) | (global_ubo.medium << MATERIAL_LIGHT_STYLE_SHIFT) & MATERIAL_LIGHT_STYLE_MASK; - imageStore(IMG_PT_GEO_NORMAL, ipos, uvec4(encode_normal(geo_normal))); + imageStore(IMG_PT_GEO_NORMAL_A, ipos, uvec4(encode_normal(geo_normal))); imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(position.xyz, uintBitsToFloat(material_id))); imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, float(checkerboard_flags))); imageStore(IMG_PT_THROUGHPUT, ipos, vec4(throughput, distance_curr)); diff --git a/src/refresh/vkpt/shader/reflect_refract.rgen b/src/refresh/vkpt/shader/reflect_refract.rgen index 9fe0d8484..29b82d72e 100644 --- a/src/refresh/vkpt/shader/reflect_refract.rgen +++ b/src/refresh/vkpt/shader/reflect_refract.rgen @@ -101,7 +101,7 @@ main() float optical_path_length = throughput_distance.a; vec4 transparent = imageLoad(IMG_PT_TRANSPARENT, ipos); vec3 primary_albedo = imageLoad(IMG_PT_ALBEDO, ipos).rgb; - vec3 geo_normal = decode_normal(imageLoad(IMG_PT_GEO_NORMAL, ipos).x); + vec3 geo_normal = decode_normal(imageLoad(IMG_PT_GEO_NORMAL_A, ipos).x); vec3 normal = decode_normal(imageLoad(IMG_PT_NORMAL_A, ipos).x); uint cluster_idx = imageLoad(IMG_PT_CLUSTER, ipos).r; @@ -496,7 +496,7 @@ main() material_id = (primary_medium << MATERIAL_LIGHT_STYLE_SHIFT) & MATERIAL_LIGHT_STYLE_MASK; imageStore(IMG_PT_NORMAL_A, ipos, uvec4(0)); - imageStore(IMG_PT_GEO_NORMAL, ipos, uvec4(0)); + imageStore(IMG_PT_GEO_NORMAL_A, ipos, uvec4(0)); imageStore(IMG_PT_ALBEDO, ipos, vec4(0)); imageStore(IMG_PT_METALLIC, ipos, vec4(0)); imageStore(IMG_PT_TRANSPARENT, ipos, transparent); @@ -706,7 +706,7 @@ main() // Replace the material light style with medium material_id = (material_id & ~MATERIAL_LIGHT_STYLE_MASK) | (primary_medium << MATERIAL_LIGHT_STYLE_SHIFT) & MATERIAL_LIGHT_STYLE_MASK; - imageStore(IMG_PT_GEO_NORMAL, ipos, uvec4(encode_normal(geo_normal))); + imageStore(IMG_PT_GEO_NORMAL_A, ipos, uvec4(encode_normal(geo_normal))); imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(position.xyz, uintBitsToFloat(material_id))); imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, float(checkerboard_flags))); imageStore(IMG_PT_THROUGHPUT, ipos, vec4(throughput, optical_path_length)); From 552f1d27886cc4639a2677aa017125be5c71131f Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 18 Nov 2020 15:21:57 -0800 Subject: [PATCH 16/48] Implemented sampling across checkerboard fields in the temporal filter to reduce blurring. --- src/refresh/vkpt/shader/asvgf_temporal.comp | 57 +++++++++++++++++++-- 1 file changed, 53 insertions(+), 4 deletions(-) diff --git a/src/refresh/vkpt/shader/asvgf_temporal.comp b/src/refresh/vkpt/shader/asvgf_temporal.comp index aef2275c3..fd3ff6d19 100644 --- a/src/refresh/vkpt/shader/asvgf_temporal.comp +++ b/src/refresh/vkpt/shader/asvgf_temporal.comp @@ -97,6 +97,30 @@ get_shared_data(ivec2 offset, out float depth, out vec3 normal, out float lum_hf lum_hf = unpackHalf4x16(normal_lum).w; } +// Convert a checkerboarded pixel position (left and right fields) to flat-screen position +ivec2 checker_to_flat(ivec2 pos, int width) +{ + uint half_width = width / 2; + bool is_even_checkerboard = pos.x < half_width; + + return ivec2( + is_even_checkerboard + ? (pos.x * 2) + (pos.y & 1) + : ((pos.x - half_width) * 2) + ((pos.y & 1) ^ 1), + pos.y); +} + +// Convert a flat-screen (regular) pixel position to checkerboarded (left and right fields) +ivec2 flat_to_checker(ivec2 pos, int width) +{ + uint half_width = width / 2; + bool is_even_checkerboard = (pos.x & 1) == (pos.y & 1); + + return ivec2( + (pos.x / 2) + (is_even_checkerboard ? 0 : half_width), + pos.y); +} + void main() { @@ -105,8 +129,26 @@ main() ivec2 ipos = ivec2(gl_GlobalInvocationID); vec4 motion = texelFetch(TEX_PT_MOTION, ipos, 0); - - vec2 pos_prev = ((vec2(ipos) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); + + // Find out if this pixel belongs to a checkerboard-split-path surface + int checkerboard_flags = int(texelFetch(TEX_PT_VIEW_DIRECTION, ipos, 0).w); + bool is_checkerboarded_surface = bitCount(checkerboard_flags) > 1; + + // If it's a regular (non-split) surface and we're running on a single GPU, + // we can access both checkerboard fields to get higher sampling quality + bool sample_across_fields = !is_checkerboarded_surface && (global_ubo.current_gpu_slice_width == global_ubo.width); + + vec2 pos_prev; + if (sample_across_fields) + { + // Reprojection in flat-screen coordinates + pos_prev = ((vec2(checker_to_flat(ipos, global_ubo.width)) + vec2(0.5)) * vec2(global_ubo.inv_width, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width, global_ubo.prev_height); + } + else + { + // Reprojection in checkerboarded coordinates + pos_prev = ((vec2(ipos) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); + } // Load the parameters of the target pixel float depth_curr; @@ -134,8 +176,8 @@ main() vec2 subpix = fract(pos_prev - vec2(0.5) - pos_ld); int field_left = 0; - int field_right = global_ubo.prev_width / 2; - if(ipos.x >= global_ubo.width / 2) + int field_right = sample_across_fields ? global_ubo.prev_width : (global_ubo.prev_width / 2); + if (!sample_across_fields && ipos.x >= global_ubo.width / 2) { field_left = field_right; field_right = global_ubo.prev_width; @@ -155,6 +197,13 @@ main() if(p.x < field_left || p.x >= field_right || p.y >= global_ubo.prev_height) continue; + if (sample_across_fields) + { + // If we're sampling both checker fields, `p` is calculated in flat coordinates, + // so translate it back into checkerboarded coordinates to load the G-buffer data + p = flat_to_checker(p, global_ubo.prev_width); + } + float depth_prev = texelFetch(TEX_PT_VIEW_DEPTH_B, p, 0).x; vec3 normal_prev = decode_normal(texelFetch(TEX_PT_NORMAL_B, p, 0).x); vec3 geo_normal_prev = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_B, p, 0).x); From 32f08422eb3aba2a423f434251490bdaef206881 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 19 Nov 2020 06:17:48 -0800 Subject: [PATCH 17/48] Replaced forward projection of gradient pixels before primary rays with backward projection after primary and reflections. This approach is more robust and it works correctly with PSR. --- src/CMakeLists.txt | 2 +- src/refresh/vkpt/asvgf.c | 49 ++-- src/refresh/vkpt/main.c | 13 +- src/refresh/vkpt/path_tracer.c | 11 +- src/refresh/vkpt/profiler.c | 4 +- src/refresh/vkpt/shader/asvgf_atrous.comp | 6 +- .../vkpt/shader/asvgf_fwd_project.comp | 260 ------------------ .../vkpt/shader/asvgf_gradient_reproject.comp | 198 +++++++++++++ src/refresh/vkpt/shader/asvgf_temporal.comp | 2 +- src/refresh/vkpt/shader/compositing.comp | 2 +- src/refresh/vkpt/shader/direct_lighting.rgen | 7 +- src/refresh/vkpt/shader/global_textures.h | 161 +++++------ src/refresh/vkpt/shader/global_ubo.h | 2 +- .../vkpt/shader/indirect_lighting.rgen | 5 +- src/refresh/vkpt/shader/primary_rays.rgen | 82 ++---- src/refresh/vkpt/shader/reflect_refract.rgen | 19 +- src/refresh/vkpt/vkpt.h | 9 +- 17 files changed, 377 insertions(+), 455 deletions(-) delete mode 100644 src/refresh/vkpt/shader/asvgf_fwd_project.comp create mode 100644 src/refresh/vkpt/shader/asvgf_gradient_reproject.comp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 118a37d4f..f78c1f9f9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -309,9 +309,9 @@ set(SRC_SHADERS refresh/vkpt/shader/path_tracer_explosion.rahit refresh/vkpt/shader/path_tracer_shadow.rmiss refresh/vkpt/shader/asvgf_atrous.comp - refresh/vkpt/shader/asvgf_fwd_project.comp refresh/vkpt/shader/asvgf_gradient_atrous.comp refresh/vkpt/shader/asvgf_gradient_img.comp + refresh/vkpt/shader/asvgf_gradient_reproject.comp refresh/vkpt/shader/asvgf_lf.comp refresh/vkpt/shader/asvgf_seed_rng.comp refresh/vkpt/shader/asvgf_taa.comp diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index 1b6a162d4..147672b78 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -21,9 +21,9 @@ with this program; if not, write to the Free Software Foundation, Inc., enum { SEED_RNG, - FWD_PROJECT, GRADIENT_IMAGE, GRADIENT_ATROUS, + GRADIENT_REPROJECT, TEMPORAL, ATROUS_LF, ATROUS_ITER_0, @@ -117,11 +117,6 @@ vkpt_asvgf_create_pipelines() .stage = SHADER_STAGE(QVK_MOD_ASVGF_SEED_RNG_COMP, VK_SHADER_STAGE_COMPUTE_BIT), .layout = pipeline_layout_atrous, }, - [FWD_PROJECT] = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = SHADER_STAGE(QVK_MOD_ASVGF_FWD_PROJECT_COMP, VK_SHADER_STAGE_COMPUTE_BIT), - .layout = pipeline_layout_general, - }, [GRADIENT_IMAGE] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE(QVK_MOD_ASVGF_GRADIENT_IMG_COMP, VK_SHADER_STAGE_COMPUTE_BIT), @@ -132,6 +127,11 @@ vkpt_asvgf_create_pipelines() .stage = SHADER_STAGE(QVK_MOD_ASVGF_GRADIENT_ATROUS_COMP, VK_SHADER_STAGE_COMPUTE_BIT), .layout = pipeline_layout_atrous, }, + [GRADIENT_REPROJECT] = { + .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, + .stage = SHADER_STAGE(QVK_MOD_ASVGF_GRADIENT_REPROJECT_COMP, VK_SHADER_STAGE_COMPUTE_BIT), + .layout = pipeline_layout_general, + }, [TEMPORAL] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE(QVK_MOD_ASVGF_TEMPORAL_COMP, VK_SHADER_STAGE_COMPUTE_BIT), @@ -256,7 +256,7 @@ vkpt_asvgf_destroy_pipelines() VkResult -vkpt_asvgf_create_gradient_samples(VkCommandBuffer cmd_buf, uint32_t frame_num, int do_gradient_samples) +vkpt_asvgf_seed_rng(VkCommandBuffer cmd_buf) { VkDescriptorSet desc_sets[] = { qvk.desc_set_ubo, @@ -308,23 +308,30 @@ vkpt_asvgf_create_gradient_samples(VkCommandBuffer cmd_buf, uint32_t frame_num, BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + (qvk.frame_counter & 1)]); - if (do_gradient_samples) - { - BEGIN_PERF_MARKER(cmd_buf, PROFILER_ASVGF_DO_GRADIENT_SAMPLES); + return VK_SUCCESS; +} - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[FWD_PROJECT]); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); - vkCmdDispatch(cmd_buf, - (qvk.gpu_slice_width_prev / GRAD_DWN + 15) / 16, - (qvk.extent_render_prev.height / GRAD_DWN + 15) / 16, - 1); +VkResult +vkpt_asvgf_gradient_reproject(VkCommandBuffer cmd_buf) +{ + VkDescriptorSet desc_sets[] = { + qvk.desc_set_ubo, + qvk_get_current_desc_set_textures(), + qvk.desc_set_vertex_buffer + }; + + int current_sample_pos_image = VKPT_IMG_ASVGF_GRAD_SMPL_POS_A + (qvk.frame_counter & 1); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + (qvk.frame_counter & 1)]); - BARRIER_COMPUTE(cmd_buf, qvk.images[current_sample_pos_image]); + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[GRADIENT_REPROJECT]); + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); + vkCmdDispatch(cmd_buf, + (qvk.gpu_slice_width_prev / GRAD_DWN + 15) / 16, + (qvk.extent_render_prev.height / GRAD_DWN + 15) / 16, + 1); - END_PERF_MARKER(cmd_buf, PROFILER_ASVGF_DO_GRADIENT_SAMPLES); - } + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + (qvk.frame_counter & 1)]); + BARRIER_COMPUTE(cmd_buf, qvk.images[current_sample_pos_image]); return VK_SUCCESS; } diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 1f14d9cb6..76e7dac8c 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -2436,9 +2436,9 @@ R_RenderFrame_RTX(refdef_t *fd) vkpt_instance_geometry(trace_cmd_buf, upload_info.num_instances, update_world_animations); END_PERF_MARKER(trace_cmd_buf, PROFILER_INSTANCE_GEOMETRY); - BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_GRADIENT_SAMPLES); - vkpt_asvgf_create_gradient_samples(trace_cmd_buf, qvk.frame_counter, ref_mode.enable_denoiser); - END_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_GRADIENT_SAMPLES); + BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_SEED_RNG); + vkpt_asvgf_seed_rng(trace_cmd_buf); + END_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_SEED_RNG); BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_BVH_UPDATE); assert(upload_info.num_vertices % 3 == 0); @@ -2511,6 +2511,13 @@ R_RenderFrame_RTX(refdef_t *fd) END_PERF_MARKER(trace_cmd_buf, PROFILER_REFLECT_REFRACT_2); } + if (ref_mode.enable_denoiser) + { + BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_GRADIENT_REPROJECT); + vkpt_asvgf_gradient_reproject(trace_cmd_buf); + END_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_GRADIENT_REPROJECT); + } + vkpt_pt_trace_lighting(trace_cmd_buf, ref_mode.num_bounce_rays); vkpt_submit_command_buffer( diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index 3cf2b06d1..01db89f35 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -1042,9 +1042,8 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) END_PERF_MARKER(cmd_buf, PROFILER_PRIMARY_RAYS); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VISBUF]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VISBUF_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_TRANSPARENT]); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_TEX_GRADIENTS]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_MOTION]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_SHADING_POSITION]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VIEW_DIRECTION]); @@ -1052,8 +1051,8 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_BOUNCE_THROUGHPUT]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_GODRAYS_THROUGHPUT_DIST]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_ALBEDO]); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_METALLIC]); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_CLUSTER]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_METALLIC_A + frame_idx]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_CLUSTER_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VIEW_DEPTH_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_NORMAL_A + frame_idx]); @@ -1094,8 +1093,8 @@ vkpt_pt_trace_reflections(VkCommandBuffer cmd_buf, int bounce) BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_THROUGHPUT]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_GODRAYS_THROUGHPUT_DIST]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_ALBEDO]); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_METALLIC]); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_CLUSTER]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_METALLIC_A + frame_idx]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_CLUSTER_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VIEW_DEPTH_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_NORMAL_A + frame_idx]); diff --git a/src/refresh/vkpt/profiler.c b/src/refresh/vkpt/profiler.c index 02eaca26b..df45d3ead 100644 --- a/src/refresh/vkpt/profiler.c +++ b/src/refresh/vkpt/profiler.c @@ -149,11 +149,11 @@ draw_profiler(int enable_asvgf) PROFILER_DO(PROFILER_BVH_UPDATE, 1); PROFILER_DO(PROFILER_UPDATE_ENVIRONMENT, 1); PROFILER_DO(PROFILER_SHADOW_MAP, 1); - PROFILER_DO(PROFILER_ASVGF_GRADIENT_SAMPLES, 1); - PROFILER_DO(PROFILER_ASVGF_DO_GRADIENT_SAMPLES, 2); + PROFILER_DO(PROFILER_ASVGF_SEED_RNG, 1); PROFILER_DO(PROFILER_PRIMARY_RAYS, 1); if (cvar_pt_reflect_refract->integer > 0) { PROFILER_DO(PROFILER_REFLECT_REFRACT_1, 1); } if (cvar_pt_reflect_refract->integer > 1) { PROFILER_DO(PROFILER_REFLECT_REFRACT_2, 1); } + PROFILER_DO(PROFILER_ASVGF_GRADIENT_REPROJECT, 1); PROFILER_DO(PROFILER_DIRECT_LIGHTING, 1); PROFILER_DO(PROFILER_INDIRECT_LIGHTING, 1); PROFILER_DO(PROFILER_GOD_RAYS, 1); diff --git a/src/refresh/vkpt/shader/asvgf_atrous.comp b/src/refresh/vkpt/shader/asvgf_atrous.comp index 4cf478838..ae5b32de4 100644 --- a/src/refresh/vkpt/shader/asvgf_atrous.comp +++ b/src/refresh/vkpt/shader/asvgf_atrous.comp @@ -73,7 +73,7 @@ filter_image( vec3 normal_center = decode_normal(texelFetch(TEX_PT_NORMAL_A, ipos, 0).x); float depth_center = texelFetch(TEX_PT_VIEW_DEPTH_A, ipos, 0).x; float fwidth_depth = texelFetch(TEX_PT_MOTION, ipos, 0).w; - float roughness_center = texelFetch(TEX_PT_METALLIC, ipos, 0).y; + float roughness_center = texelFetch(TEX_PT_METALLIC_A, ipos, 0).y; float lum_mean_hf = 0; float sigma_l_hf = 0; @@ -152,7 +152,7 @@ filter_image( vec3 normal = decode_normal(texelFetch(TEX_PT_NORMAL_A, p, 0).x); float depth = texelFetch(TEX_PT_VIEW_DEPTH_A, p, 0).x; - float roughness = texelFetch(TEX_PT_METALLIC, p, 0).y; + float roughness = texelFetch(TEX_PT_METALLIC_A, p, 0).y; float dist_z = abs(depth_center - depth) * fwidth_depth * global_ubo.flt_atrous_depth; w *= exp(-dist_z / float(step_size)); @@ -316,7 +316,7 @@ main() filtered_spec /= STORAGE_SCALE_SPEC; vec3 normal = decode_normal(texelFetch(TEX_PT_NORMAL_A, ipos, 0).x); - vec2 metallic_roughness = texelFetch(TEX_PT_METALLIC, ipos, 0).rg; + vec2 metallic_roughness = texelFetch(TEX_PT_METALLIC_A, ipos, 0).rg; float checkerboard_flags = texelFetch(TEX_PT_VIEW_DIRECTION, ipos, 0).a; float metallic = metallic_roughness.x; float roughness = metallic_roughness.y; diff --git a/src/refresh/vkpt/shader/asvgf_fwd_project.comp b/src/refresh/vkpt/shader/asvgf_fwd_project.comp deleted file mode 100644 index cb19fa75d..000000000 --- a/src/refresh/vkpt/shader/asvgf_fwd_project.comp +++ /dev/null @@ -1,260 +0,0 @@ -/* -Copyright (C) 2018 Christoph Schied -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -// ========================================================================== // -// This shader projects surfaces from the previous frame onto the current frame. -// The goal is to have a single "gradient" pixel in every 3x3 square of the -// screen, and that pixel would map to a valid surface from the previous frame, -// including the random number sequence. The sequence is important to get the -// same lighting results as we got on the previous frame, unless something -// significant changed, like light occlusion or intensity. -// There is at least one case when gradient samples produce false invalidation: -// if a "static" (triangle) light smoothly moves between clusters. In this case, -// the per-cluster light lists change, and the path tracer gives different -// results even if the RNG sequences are the same. -// -// See `asvgf.glsl` for general information about denoisers in Q2RTX. -// ========================================================================== // - -#version 460 -#extension GL_GOOGLE_include_directive : enable -#extension GL_EXT_nonuniform_qualifier : enable - -layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; - -#include "utils.glsl" - -#define GLOBAL_UBO_DESC_SET_IDX 0 -#include "global_ubo.h" - -#define GLOBAL_TEXTURES_DESC_SET_IDX 1 -#include "global_textures.h" - -#define VERTEX_BUFFER_DESC_SET_IDX 2 -#include "vertex_buffer.h" - -#include "asvgf.glsl" -#include "read_visbuf.glsl" -#include "projection.glsl" - -void -encrypt_tea(inout uvec2 arg) -{ - const uint key[] = { - 0xa341316c, 0xc8013ea4, 0xad90777d, 0x7e95761e - }; - uint v0 = arg[0], v1 = arg[1]; - uint sum = 0; - uint delta = 0x9e3779b9; - - for(int i = 0; i < 16; i++) { // XXX rounds reduced, carefully check if good - //for(int i = 0; i < 32; i++) { - sum += delta; - v0 += ((v1 << 4) + key[0]) ^ (v1 + sum) ^ ((v1 >> 5) + key[1]); - v1 += ((v0 << 4) + key[2]) ^ (v0 + sum) ^ ((v0 >> 5) + key[3]); - } - arg[0] = v0; - arg[1] = v1; -} - -void -main() -{ - ivec2 ipos = ivec2(gl_GlobalInvocationID.xy * GRAD_DWN); - vec2 prev_lum; - { - // Find the brightest pixel in the stratum, but _not_ the same one as we used on the previous frame. - // Picking the brightest pixel helps prevent bright trails when the light has moved. - // If we just pick a random pixel in the the penumbra of the sun light for example, - // there is a high chance that this pixel will not receive any sun light due to random sampling of the sun. - // Overall, we'll miss the changing luminance of the moving penumbra, which is very well visible. - - // Pull the prev. frame sample position to make sure we're not using the same pixel. - // It's important because keeping the same random number sequence for more than one frame - // introduces a visible bias. - - uint prev_grad_sample_pos = texelFetch(TEX_ASVGF_GRAD_SMPL_POS_B, ivec2(gl_GlobalInvocationID.xy), 0).x; - - ivec2 prev_strata_pos = ivec2( - prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 0), - prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 1)) & STRATUM_OFFSET_MASK; - - // Go over all the HF and specular color values from the previous frame to find the brightest pixel. - - ivec2 arg_max = ivec2(0); - float lum_max = 0; - for(int yy = 0; yy < GRAD_DWN; yy++) - for(int xx = 0; xx < GRAD_DWN; xx++) - { - // Same as previous frame - skip - if(xx == prev_strata_pos.x && yy == prev_strata_pos.y) - continue; - - // Pull the colors - ivec2 p = ipos + ivec2(xx, yy); - vec3 prev_hf = unpackRGBE(texelFetch(TEX_PT_COLOR_HF, p, 0).x); - vec3 prev_spec = unpackRGBE(texelFetch(TEX_PT_COLOR_SPEC, p, 0).x); - vec2 lums = vec2(luminance(prev_hf), luminance(prev_spec)); - - // Use total luminance of diffuse ans specular as the heuristic - float lum_sum = lums.x + lums.y; - - if(lum_sum > lum_max) - { - lum_max = lum_sum; - arg_max = ivec2(xx, yy); - prev_lum = lums; - } - } - - - if(lum_max > 0) - { - // We found a suitable pixel - use it - - ipos += arg_max; - } - else - { - // We didn't find one - all pixels, maybe other than the previously used one, were black. - // Pick a random pixel in this case. - - uvec2 arg = uvec2(gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * global_ubo.prev_gpu_slice_width, - global_ubo.current_frame_idx); - encrypt_tea(arg); - arg %= GRAD_DWN; - - ipos += ivec2(arg); - vec3 prev_hf = unpackRGBE(texelFetch(TEX_PT_COLOR_HF, ipos, 0).x); - vec3 prev_spec = unpackRGBE(texelFetch(TEX_PT_COLOR_SPEC, ipos, 0).x); - prev_lum = vec2(luminance(prev_hf), luminance(prev_spec)); - } - } - - if(any(greaterThanEqual(ipos, ivec2(global_ubo.prev_gpu_slice_width, global_ubo.prev_height)))) - return; - - vec4 vis_buf = texelFetch(TEX_PT_VISBUF, ipos, 0); - if(any(lessThan(vis_buf.xy, vec2(0)))) - return; - - int checkerboard_offset = (ipos.x > global_ubo.prev_width / 2) ? global_ubo.width / 2 : 0; - - uint visbuf_instance_info = floatBitsToUint(vis_buf.z); - uint primitive_id = floatBitsToUint(vis_buf.w); - uint instance_id_prev, triangle_idx, instance_id_curr; - uint cluster_prev = ~0u, cluster_curr = ~0u; - unpack_instance_id_triangle_idx(visbuf_instance_info, instance_id_prev, triangle_idx); - - instance_id_curr = instance_id_prev; - - bool is_dynamic = false; - - if(visbuf_is_world_instance(visbuf_instance_info)) { - if(!visbuf_is_static_world_model(visbuf_instance_info)) { - instance_id_prev &= ~VISBUF_WORLD_INSTANCE_FLAG; - cluster_prev = instance_buffer.bsp_cluster_id_prev[instance_id_prev]; - - instance_id_curr = instance_buffer.world_prev_to_current[instance_id_prev]; - - // the object no longer exists - if(instance_id_curr == ~0u) - return; - - cluster_curr = instance_buffer.bsp_cluster_id[instance_id_curr]; - - uint buf_offset = instance_buffer.bsp_instance_buf_offset[instance_id_curr]; - primitive_id = buf_offset + triangle_idx; - is_dynamic = true; - - instance_id_curr |= VISBUF_WORLD_INSTANCE_FLAG; - } - } - else { - cluster_prev = instance_buffer.model_cluster_id_prev[instance_id_prev]; - - instance_id_curr = instance_buffer.model_prev_to_current[instance_id_prev]; - - // the object no longer exists - if(instance_id_curr == ~0u) - return; - - cluster_curr = instance_buffer.model_cluster_id[instance_id_curr]; - - uint buf_offset = instance_buffer.model_instance_buf_offset[instance_id_curr]; - primitive_id = buf_offset + triangle_idx; - is_dynamic = true; - } - - // the object moved between clusters - gradient sampling is pointless because light lists are different - if(cluster_curr != cluster_prev) - return; - - Triangle triangle; - if(is_dynamic) - triangle = get_instanced_triangle(primitive_id); - else - triangle = get_bsp_triangle(primitive_id); - - vec3 bary; - bary.yz = vis_buf.xy; - bary.x = 1.0 - vis_buf.x - vis_buf.y; - vec3 pos_ws = triangle.positions * bary; - - vec2 screen_pos_curr; - float distance_curr; - vec3 view_pos_curr = (global_ubo.V * vec4(pos_ws, 1.0)).xyz; - - if(!projection_view_to_screen(view_pos_curr, screen_pos_curr, distance_curr, false)) - { - return; - } - - /* pixel coordinate of forward projected sample */ - ivec2 ipos_curr = ivec2(screen_pos_curr * vec2(global_ubo.width / 2, global_ubo.height)) + ivec2(checkerboard_offset, 0); - - ivec2 pos_grad = ipos_curr / GRAD_DWN; - ivec2 pos_stratum = ipos_curr % GRAD_DWN; - - uint gradient_idx = - (1 << 31) /* mark sample as busy */ - | (pos_stratum.x << (STRATUM_OFFSET_SHIFT * 0)) /* encode pos in */ - | (pos_stratum.y << (STRATUM_OFFSET_SHIFT * 1)); /* current frame */ - - /* check if this sample is allowed to become a gradient sample */ - if(imageAtomicCompSwap(IMG_ASVGF_GRAD_SMPL_POS_A, pos_grad, 0u, gradient_idx) != 0) { - return; - } - - vec4 tex_gradients = texelFetch(TEX_PT_TEX_GRADIENTS, ipos, 0); - imageStore(IMG_ASVGF_TEX_GRADIENTS_FWD, pos_grad, tex_gradients); - - /* forward-project the rng seed */ - uint rng_prev = texelFetch(TEX_ASVGF_RNG_SEED_B, ipos, 0).x; - imageStore(IMG_ASVGF_RNG_SEED_A, ipos_curr, rng_prev.xxxx); - - /* forward-project the clip-space position for handling sub-pixel offsets */ - imageStore(IMG_ASVGF_POS_WS_FWD, pos_grad, vec4(pos_ws, 0.0)); - - vis_buf.z = uintBitsToFloat(pack_instance_id_triangle_idx(instance_id_curr, triangle_idx)); - vis_buf.w = uintBitsToFloat(primitive_id); - imageStore(IMG_ASVGF_VISBUF_FWD, pos_grad, vis_buf); - - imageStore(IMG_ASVGF_GRAD_HF_SPEC_PING, pos_grad, vec4(prev_lum, 0, 0)); -} diff --git a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp new file mode 100644 index 000000000..0a8d85a9b --- /dev/null +++ b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp @@ -0,0 +1,198 @@ +/* +Copyright (C) 2018 Christoph Schied +Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#version 460 +#extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_nonuniform_qualifier : enable + +layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; + +#include "utils.glsl" + +#define GLOBAL_UBO_DESC_SET_IDX 0 +#include "global_ubo.h" + +#define GLOBAL_TEXTURES_DESC_SET_IDX 1 +#include "global_textures.h" + +#define VERTEX_BUFFER_DESC_SET_IDX 2 +#include "vertex_buffer.h" + +#include "asvgf.glsl" +#include "read_visbuf.glsl" +#include "projection.glsl" + +void patch_position(ivec2 ipos, ivec2 found_pos_prev) +{ + vec4 vis_buf = texelFetch(TEX_PT_VISBUF_B, found_pos_prev, 0); + uint visbuf_instance_info = floatBitsToUint(vis_buf.z); + uint primitive_id = floatBitsToUint(vis_buf.w); + uint instance_id_prev, triangle_idx, instance_id_curr; + unpack_instance_id_triangle_idx(visbuf_instance_info, instance_id_prev, triangle_idx); + + instance_id_curr = instance_id_prev; + + bool is_dynamic = false; + + + if(visbuf_is_world_instance(visbuf_instance_info)) { + if(!visbuf_is_static_world_model(visbuf_instance_info)) { + instance_id_prev &= ~VISBUF_WORLD_INSTANCE_FLAG; + + instance_id_curr = instance_buffer.world_prev_to_current[instance_id_prev]; + + // the object no longer exists + if(instance_id_curr == ~0u) + return; + + uint buf_offset = instance_buffer.bsp_instance_buf_offset[instance_id_curr]; + primitive_id = buf_offset + triangle_idx; + is_dynamic = true; + + instance_id_curr |= VISBUF_WORLD_INSTANCE_FLAG; + } + } + else { + instance_id_curr = instance_buffer.model_prev_to_current[instance_id_prev]; + + // the object no longer exists + if(instance_id_curr == ~0u) + return; + + uint buf_offset = instance_buffer.model_instance_buf_offset[instance_id_curr]; + primitive_id = buf_offset + triangle_idx; + is_dynamic = true; + } + + Triangle triangle; + if(is_dynamic) + triangle = get_instanced_triangle(primitive_id); + else + triangle = get_bsp_triangle(primitive_id); + + vec3 bary; + bary.yz = vis_buf.xy; + bary.x = 1.0 - bary.y - bary.z; + + vec3 position = triangle.positions * bary; + + float materialId = imageLoad(IMG_PT_SHADING_POSITION, ipos).w; + imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(position, materialId)); + + uint checkerboard_flags = int(imageLoad(IMG_PT_VIEW_DIRECTION, ipos).w); + if (checkerboard_flags == CHECKERBOARD_FLAG_PRIMARY) + { + vec3 view_direction = normalize(position - global_ubo.cam_pos.xyz); + imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(view_direction, checkerboard_flags)); + } +} + +void +main() +{ + ivec2 pos_grad = ivec2(gl_GlobalInvocationID); + ivec2 ipos = pos_grad * GRAD_DWN; + + bool found = false; + ivec2 found_offset = ivec2(0); + ivec2 found_pos_prev = ivec2(0); + vec2 found_prev_lum = vec2(0); + + int field_left = 0; + int field_right = global_ubo.prev_width / 2; + if (ipos.x >= global_ubo.width / 2) + { + field_left = field_right; + field_right = global_ubo.prev_width; + } + + for(int offy = 0; offy < GRAD_DWN; offy++) + { + for(int offx = 0; offx < GRAD_DWN; offx++) + { + ivec2 p = ipos + ivec2(offx, offy); + + vec4 motion = texelFetch(TEX_PT_MOTION, p, 0); + vec2 pos_prev = ((vec2(p) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); + ivec2 pp = ivec2(floor(pos_prev)); + + if(pp.x < field_left || pp.x >= field_right || pp.y >= global_ubo.prev_height) + continue; + + + ivec2 pos_grad_prev = pp / GRAD_DWN; + + uint prev_grad_sample_pos = texelFetch(TEX_ASVGF_GRAD_SMPL_POS_B, pp / GRAD_DWN, 0).x; + ivec2 stratum_prev = ivec2( + prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 0), + prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 1)) & STRATUM_OFFSET_MASK; + + if(all(equal(pos_grad_prev * GRAD_DWN + stratum_prev, pp))) + continue; + + uint cluster_curr = texelFetch(TEX_PT_CLUSTER_A, p, 0).x; + uint cluster_prev = texelFetch(TEX_PT_CLUSTER_B, pp, 0).x; + float depth_curr = texelFetch(TEX_PT_VIEW_DEPTH_A, p, 0).x; + float depth_prev = texelFetch(TEX_PT_VIEW_DEPTH_B, pp, 0).x; + vec3 geo_normal_curr = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, p, 0).x); + vec3 geo_normal_prev = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_B, pp, 0).x); + + float dist_depth = abs(depth_curr - depth_prev + motion.z) / abs(depth_curr); + float dot_geo_normals = dot(geo_normal_curr, geo_normal_prev); + + if(cluster_curr == cluster_prev && dist_depth < 0.1 && dot_geo_normals > 0.9) + { + vec3 prev_hf = unpackRGBE(texelFetch(TEX_PT_COLOR_HF, pp, 0).x); + vec3 prev_spec = unpackRGBE(texelFetch(TEX_PT_COLOR_SPEC, pp, 0).x); + vec2 prev_lum = vec2(luminance(prev_hf), luminance(prev_spec)); + + if(prev_lum.x > found_prev_lum.x) + { + found_prev_lum = prev_lum; + found_offset = ivec2(offx, offy); + found_pos_prev = pp; + found = true; + } + } + } + } + + if (!found) + { + imageStore(IMG_ASVGF_GRAD_SMPL_POS_A, pos_grad, uvec4(0)); + return; + } + + ipos += found_offset; + + uint gradient_idx = + (1 << 31) /* mark sample as busy */ + | (found_offset.x << (STRATUM_OFFSET_SHIFT * 0)) /* encode pos in */ + | (found_offset.y << (STRATUM_OFFSET_SHIFT * 1)); /* current frame */ + + imageStore(IMG_ASVGF_GRAD_SMPL_POS_A, pos_grad, uvec4(gradient_idx)); + + imageStore(IMG_ASVGF_GRAD_HF_SPEC_PING, pos_grad, vec4(found_prev_lum, 0, 0)); + + imageStore(IMG_ASVGF_RNG_SEED_A, ipos, texelFetch(TEX_ASVGF_RNG_SEED_B, found_pos_prev, 0)); + imageStore(IMG_PT_NORMAL_A, ipos, texelFetch(TEX_PT_NORMAL_B, found_pos_prev, 0)); + imageStore(IMG_PT_METALLIC_A, ipos, texelFetch(TEX_PT_METALLIC_B, found_pos_prev, 0)); + + patch_position(ipos, found_pos_prev); +} diff --git a/src/refresh/vkpt/shader/asvgf_temporal.comp b/src/refresh/vkpt/shader/asvgf_temporal.comp index fd3ff6d19..a88981a20 100644 --- a/src/refresh/vkpt/shader/asvgf_temporal.comp +++ b/src/refresh/vkpt/shader/asvgf_temporal.comp @@ -156,7 +156,7 @@ main() float lum_curr_hf; get_shared_data(ivec2(0), depth_curr, normal_curr, lum_curr_hf); - vec2 metal_rough = texelFetch(TEX_PT_METALLIC, ipos, 0).xy; + vec2 metal_rough = texelFetch(TEX_PT_METALLIC_A, ipos, 0).xy; float shininess = clamp(2.0 / square(square(metal_rough.y)) - 2.0, 0.0, 32.0); vec3 geo_normal_curr = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos, 0).x); diff --git a/src/refresh/vkpt/shader/compositing.comp b/src/refresh/vkpt/shader/compositing.comp index 8e0d219f6..f28e99f59 100644 --- a/src/refresh/vkpt/shader/compositing.comp +++ b/src/refresh/vkpt/shader/compositing.comp @@ -45,7 +45,7 @@ main() // Load the surface parameters vec3 normal = decode_normal(texelFetch(TEX_PT_NORMAL_A, ipos, 0).x); - vec2 metallic_roughness = texelFetch(TEX_PT_METALLIC, ipos, 0).rg; + vec2 metallic_roughness = texelFetch(TEX_PT_METALLIC_A, ipos, 0).rg; bool is_checkerboard_material = texelFetch(TEX_PT_VIEW_DIRECTION, ipos, 0).a == 2.0; float metallic = metallic_roughness.x; diff --git a/src/refresh/vkpt/shader/direct_lighting.rgen b/src/refresh/vkpt/shader/direct_lighting.rgen index c0ac423e1..b52edd4cf 100644 --- a/src/refresh/vkpt/shader/direct_lighting.rgen +++ b/src/refresh/vkpt/shader/direct_lighting.rgen @@ -56,9 +56,10 @@ direct_lighting(ivec2 ipos, bool is_odd_checkerboard, out vec3 high_freq, out ve vec4 view_direction = texelFetch(TEX_PT_VIEW_DIRECTION, ipos, 0); vec3 normal = decode_normal(texelFetch(TEX_PT_NORMAL_A, ipos, 0).x); vec3 geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos, 0).x); - float primary_roughness = texelFetch(TEX_PT_METALLIC, ipos, 0).y; - float primary_specular = texelFetch(TEX_PT_ALBEDO, ipos, 0).a; - uint cluster_idx = texelFetch(TEX_PT_CLUSTER, ipos, 0).x; + vec3 metal_rough_spec = texelFetch(TEX_PT_METALLIC_A, ipos, 0).xyz; + float primary_roughness = metal_rough_spec.y; + float primary_specular = metal_rough_spec.z; + uint cluster_idx = texelFetch(TEX_PT_CLUSTER_A, ipos, 0).x; if(cluster_idx == 0xffff) cluster_idx = ~0u; // because the image is uint16 bool primary_is_weapon = (material_id & MATERIAL_FLAG_WEAPON) != 0; diff --git a/src/refresh/vkpt/shader/global_textures.h b/src/refresh/vkpt/shader/global_textures.h index ae24ab402..1a0b8f0c1 100644 --- a/src/refresh/vkpt/shader/global_textures.h +++ b/src/refresh/vkpt/shader/global_textures.h @@ -34,96 +34,101 @@ with this program; if not, write to the Free Software Foundation, Inc., /* These are images that are to be used as render targets and buffers, but not textures. */ #define LIST_IMAGES \ - IMG_DO(PT_VISBUF, 0, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_ALBEDO, 1, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_METALLIC, 2, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_MOTION, 3, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_TRANSPARENT, 4, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_TEX_GRADIENTS, 5, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_HF, 6, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_ATROUS_PING_LF_SH, 7, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_ATROUS_PONG_LF_SH, 8, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_ATROUS_PING_LF_COCG, 9, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_ATROUS_PONG_LF_COCG, 10, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_ATROUS_PING_HF, 11, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_ATROUS_PONG_HF, 12, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_ATROUS_PING_SPEC, 13, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_ATROUS_PONG_SPEC, 14, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_ATROUS_PING_MOMENTS, 15, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_ATROUS_PONG_MOMENTS, 16, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_COLOR, 17, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TEX_GRADIENTS_FWD, 18, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_POS_WS_FWD, 19, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_VISBUF_FWD, 20, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_LF_PING, 21, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_LF_PONG, 22, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_HF_SPEC_PING, 23, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_HF_SPEC_PONG, 24, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(PT_SHADING_POSITION, 25, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(FLAT_COLOR, 26, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(FLAT_MOTION, 27, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_GODRAYS_THROUGHPUT_DIST,28, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(BLOOM_DOWNSCALE_MIP_1, 29, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 2, qvk.extent_taa.height / 2 ) \ - IMG_DO(BLOOM_HBLUR, 30, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ - IMG_DO(BLOOM_VBLUR, 31, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ - IMG_DO(TAA_OUTPUT, 32, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(PT_VIEW_DIRECTION, 33, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DIRECTION2, 34, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_THROUGHPUT, 35, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_BOUNCE_THROUGHPUT, 36, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(HQ_COLOR_INTERLEAVED, 37, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_COLOR_LF_SH, 38, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_COLOR_LF_COCG, 39, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_COLOR_HF, 40, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_COLOR_SPEC, 41, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL2, 42, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_CLUSTER, 43, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_ALBEDO, 0, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_MOTION, 1, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_TRANSPARENT, 2, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_HF, 3, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_ATROUS_PING_LF_SH, 4, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_ATROUS_PONG_LF_SH, 5, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_ATROUS_PING_LF_COCG, 6, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_ATROUS_PONG_LF_COCG, 7, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_ATROUS_PING_HF, 8, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_ATROUS_PONG_HF, 9, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_ATROUS_PING_SPEC, 10, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_ATROUS_PONG_SPEC, 11, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_ATROUS_PING_MOMENTS, 12, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_ATROUS_PONG_MOMENTS, 13, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_COLOR, 14, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_GRAD_LF_PING, 15, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_LF_PONG, 16, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_HF_SPEC_PING, 17, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_HF_SPEC_PONG, 18, R16G16_SFLOAT, rg16f, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(PT_SHADING_POSITION, 19, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(FLAT_COLOR, 20, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(FLAT_MOTION, 21, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_GODRAYS_THROUGHPUT_DIST,22, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(BLOOM_DOWNSCALE_MIP_1, 23, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 2, qvk.extent_taa.height / 2 ) \ + IMG_DO(BLOOM_HBLUR, 24, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ + IMG_DO(BLOOM_VBLUR, 25, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ + IMG_DO(TAA_OUTPUT, 26, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(PT_VIEW_DIRECTION, 27, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DIRECTION2, 28, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_THROUGHPUT, 29, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_BOUNCE_THROUGHPUT, 30, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(HQ_COLOR_INTERLEAVED, 31, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_COLOR_LF_SH, 32, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_COLOR_LF_COCG, 33, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_COLOR_HF, 34, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_COLOR_SPEC, 35, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL2, 36, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ #define LIST_IMAGES_A_B \ - IMG_DO(PT_VIEW_DEPTH_A, 44, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_B, 45, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_A, 46, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_B, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_A, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_B, 49, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_A, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_TAA_B, 55, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_RNG_SEED_A, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_RNG_SEED_B, 57, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 58, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 59, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,60, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,61, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_A, 62, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_B, 63, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - -#define LIST_IMAGES_B_A \ - IMG_DO(PT_VIEW_DEPTH_B, 44, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_A, 45, R32_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_A, 37, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_B, 38, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_A, 39, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_B, 40, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_A, 41, R8G8B8A8_UNORM, rgba8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_B, 42, R8G8B8A8_UNORM, rgba8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_A, 43, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_B, 44, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_A, 45, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_NORMAL_B, 46, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_A, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_A, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_GEO_NORMAL_B, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_A, 49, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_A, 49, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_FILTERED_SPEC_B, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_TAA_A, 55, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_RNG_SEED_A, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_B, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_RNG_SEED_A, 57, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 58, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 59, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,59, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,60, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,61, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_A, 61, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ IMG_DO(ASVGF_GRAD_SMPL_POS_B, 62, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_A, 63, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ -#define NUM_IMAGES 64 /* this really sucks but I don't know how to fix it +#define LIST_IMAGES_B_A \ + IMG_DO(PT_VISBUF_B, 37, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_A, 38, R32G32B32A32_SFLOAT, rgba32f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_B, 39, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_A, 40, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_B, 41, R8G8B8A8_UNORM, rgba8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_A, 42, R8G8B8A8_UNORM, rgba8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_B, 43, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_A, 44, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_B, 45, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_A, 46, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_B, 47, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_A, 48, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_B, 49, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_A, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_RNG_SEED_B, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_RNG_SEED_A, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 58, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,59, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,60, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_B, 61, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_A, 62, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + +#define NUM_IMAGES 63 /* this really sucks but I don't know how to fix it counting with enum does not work in GLSL */ // todo: make naming consistent! diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 73698ee4a..e229c3d2b 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -48,7 +48,7 @@ with this program; if not, write to the Free Software Foundation, Inc., UBO_CVAR_DO(flt_atrous_normal_spec, 1) \ UBO_CVAR_DO(flt_enable, 1) /* switch for the entire SVGF reconstruction, 0 or 1 */ \ UBO_CVAR_DO(flt_fixed_albedo, 0) /* if nonzero, replaces surface albedo with that value after filtering */ \ - UBO_CVAR_DO(flt_grad_transparent, 0.3) /* gradient scale for reflections and refractions, [0..1] */ \ + UBO_CVAR_DO(flt_grad_transparent, 1.0) /* gradient scale for reflections and refractions, [0..1] */ \ UBO_CVAR_DO(flt_min_alpha_color_hf, 0.02) /* minimum weight for the new frame data, color channel, (0..1] */ \ UBO_CVAR_DO(flt_min_alpha_color_lf, 0.01) \ UBO_CVAR_DO(flt_min_alpha_color_spec, 0.01) \ diff --git a/src/refresh/vkpt/shader/indirect_lighting.rgen b/src/refresh/vkpt/shader/indirect_lighting.rgen index 1b295c079..9f8d11804 100644 --- a/src/refresh/vkpt/shader/indirect_lighting.rgen +++ b/src/refresh/vkpt/shader/indirect_lighting.rgen @@ -82,8 +82,9 @@ indirect_lighting( if(!half_res) { - primary_roughness = texelFetch(TEX_PT_METALLIC, ipos, 0).y; - primary_specular = texelFetch(TEX_PT_ALBEDO, ipos, 0).a; + vec3 metal_rough_spec = texelFetch(TEX_PT_METALLIC_A, ipos, 0).xyz; + primary_roughness = metal_rough_spec.y; + primary_specular = metal_rough_spec.z; } } else diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index b459f816a..87b5ca180 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -103,8 +103,6 @@ main() bool is_odd_checkerboard = (gl_LaunchIDNV.z != 0) || (push_constants.gpu_index == 1); - bool is_gradient = get_is_gradient(ipos); - rng_seed = texelFetch(TEX_ASVGF_RNG_SEED_A, ipos, 0).r; vec3 position; vec3 direction; @@ -119,7 +117,7 @@ main() else { // Real-time mode - use predictable sampling for TAAU - pixel_offset = is_gradient ? global_ubo.prev_sub_pixel_jitter : global_ubo.sub_pixel_jitter; + pixel_offset = global_ubo.sub_pixel_jitter; } const ivec2 image_position = get_image_position(); @@ -128,18 +126,6 @@ main() Ray ray = get_primary_ray(inUV); - if(is_gradient) - { - /* gradient samples only need to verify visibility but need to - * maintain the precise location */ - vec3 pos_ws = texelFetch(TEX_ASVGF_POS_WS_FWD, ipos / GRAD_DWN, 0).xyz; - ray.origin = global_ubo.cam_pos.xyz; - ray.direction = pos_ws - ray.origin; - float len = length(ray.direction); - ray.direction /= len; - ray.t_max = len - 0.1; - } - bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.width / 4, global_ubo.height / 2))); if(is_readback_pixel) { @@ -161,7 +147,7 @@ main() // If the primary ray didn't hit anything, or it hit a sky polygon and pt_show_sky is disabled, // store the sky color and motion vectors. Doesn't apply to gradient samples because their rays intentionally miss. - if((!found_intersection(ray_payload_brdf) || (is_sky(ray_payload_brdf) && (global_ubo.pt_show_sky == 0))) && !is_gradient) + if((!found_intersection(ray_payload_brdf) || (is_sky(ray_payload_brdf) && (global_ubo.pt_show_sky == 0)))) { vec3 env = env_map(ray.direction, false); env *= global_ubo.pt_env_scale; @@ -189,10 +175,9 @@ main() imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, 0)); imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(global_ubo.cam_pos.xyz + direction * PRIMARY_RAY_T_MAX, 0)); imageStore(IMG_PT_MOTION, ipos, vec4(motion, 0, 0)); - imageStore(IMG_PT_VISBUF, ipos, vec4(-1, -1, uintBitsToFloat(uvec2(~0u)))); + imageStore(IMG_PT_VISBUF_A, ipos, vec4(-1, -1, uintBitsToFloat(uvec2(~0u)))); imageStore(IMG_PT_ALBEDO, ipos, vec4(0)); imageStore(IMG_PT_TRANSPARENT, ipos, transparent); - imageStore(IMG_PT_TEX_GRADIENTS, ipos, vec4(0)); return; } @@ -200,41 +185,18 @@ main() vec3 bary; { - bool is_dynamic_primitive = false; - uint primitive_id = 0; - vec4 vis_buf; - - /* reprojection was valid for the gradient sample */ - if(is_gradient && !found_intersection(ray_payload_brdf)) { - vis_buf = texelFetch(TEX_ASVGF_VISBUF_FWD, ipos / GRAD_DWN, 0); - - bary.yz = vis_buf.xy; - bary.x = 1.0 - bary.y - bary.z; - - uint visbuf_instance_info = floatBitsToUint(vis_buf.z); - is_dynamic_primitive = visbuf_is_world_instance(visbuf_instance_info) ? !visbuf_is_static_world_model(visbuf_instance_info) : true; - - primitive_id = floatBitsToUint(vis_buf.w); - } - else - { - is_dynamic_primitive = is_dynamic_instance(ray_payload_brdf); - primitive_id = get_primitive(ray_payload_brdf); - bary = get_hit_barycentric(ray_payload_brdf); - - if(is_gradient) { /* gradient sample became occluded, mask out */ - imageStore(IMG_ASVGF_GRAD_SMPL_POS_A, ipos / GRAD_DWN, uvec4(0)); - } - is_gradient = false; - - uint visbuf_instance_info = is_dynamic_primitive ? get_instance_id_instanced(primitive_id) : VISBUF_STATIC_GEOMETRY; + bool is_dynamic_primitive = is_dynamic_instance(ray_payload_brdf); + uint primitive_id = get_primitive(ray_payload_brdf); + bary = get_hit_barycentric(ray_payload_brdf); + + uint visbuf_instance_info = is_dynamic_primitive ? get_instance_id_instanced(primitive_id) : VISBUF_STATIC_GEOMETRY; - vis_buf.xy = bary.yz; - vis_buf.z = uintBitsToFloat(visbuf_instance_info); - vis_buf.w = uintBitsToFloat(primitive_id); - } + vec4 vis_buf; + vis_buf.xy = bary.yz; + vis_buf.z = uintBitsToFloat(visbuf_instance_info); + vis_buf.w = uintBitsToFloat(primitive_id); - imageStore(IMG_PT_VISBUF, ipos, vis_buf); + imageStore(IMG_PT_VISBUF_A, ipos, vis_buf); if(is_dynamic_primitive) triangle = get_instanced_triangle(primitive_id); @@ -297,15 +259,6 @@ main() // Compute angle between adjacent rays using approximate acos(dot(...)), assume horizontal angle == vertical angle float footprint_size_over_distance = sqrt(max(0, 2.0 - 2.0 * dot(ray_x.direction, ray_0.direction))); - if(is_gradient) - { - vec4 fwd_gradients = texelFetch(TEX_ASVGF_TEX_GRADIENTS_FWD, ipos / GRAD_DWN, 0); - tex_coord_x = fwd_gradients.xy; - tex_coord_y = fwd_gradients.zw; - } - - imageStore(IMG_PT_TEX_GRADIENTS, ipos, vec4(tex_coord_x.xy, tex_coord_y.xy)); - vec3 primary_albedo = vec3(1); float primary_metallic = 0; float primary_specular = 0; @@ -439,9 +392,9 @@ main() imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, float(checkerboard_flags))); imageStore(IMG_PT_THROUGHPUT, ipos, vec4(throughput, distance_curr)); imageStore(IMG_PT_BOUNCE_THROUGHPUT, ipos, vec4(1, 1, 1, footprint_size_over_distance)); - imageStore(IMG_PT_CLUSTER, ipos, ivec4(triangle.cluster)); + imageStore(IMG_PT_CLUSTER_A, ipos, ivec4(triangle.cluster)); imageStore(IMG_PT_ALBEDO, ipos, vec4(primary_albedo, primary_specular)); - imageStore(IMG_PT_METALLIC, ipos, vec4(primary_metallic, primary_roughness, 0, 0)); + imageStore(IMG_PT_METALLIC_A, ipos, vec4(primary_metallic, primary_roughness, primary_specular, 0)); imageStore(IMG_PT_GODRAYS_THROUGHPUT_DIST, ipos, vec4(1, 1, 1, distance_curr)); // Debug visualization of the PVS (Potentially Visible Set) @@ -463,7 +416,7 @@ main() } } - if(global_ubo.pt_show_sky != 0 && is_sky(ray_payload_brdf) && !is_gradient) + if(global_ubo.pt_show_sky != 0 && is_sky(ray_payload_brdf)) { // show additional information about sky boxes: triangle edges... if(any(lessThan(bary, vec3(0.02)))) @@ -480,8 +433,7 @@ main() } } - // If it's a valid gradient sample, the primary ray is a miss and has no hit_distance. - float hit_distance = is_gradient ? ray.t_max : ray_payload_brdf.hit_distance; + float hit_distance = ray_payload_brdf.hit_distance; if(ray_payload_brdf.max_transparent_distance <= hit_distance) { diff --git a/src/refresh/vkpt/shader/reflect_refract.rgen b/src/refresh/vkpt/shader/reflect_refract.rgen index 29b82d72e..99cff6464 100644 --- a/src/refresh/vkpt/shader/reflect_refract.rgen +++ b/src/refresh/vkpt/shader/reflect_refract.rgen @@ -103,7 +103,7 @@ main() vec3 primary_albedo = imageLoad(IMG_PT_ALBEDO, ipos).rgb; vec3 geo_normal = decode_normal(imageLoad(IMG_PT_GEO_NORMAL_A, ipos).x); vec3 normal = decode_normal(imageLoad(IMG_PT_NORMAL_A, ipos).x); - uint cluster_idx = imageLoad(IMG_PT_CLUSTER, ipos).r; + uint cluster_idx = imageLoad(IMG_PT_CLUSTER_A, ipos).r; int primary_medium = int(material_id & MATERIAL_LIGHT_STYLE_MASK) >> MATERIAL_LIGHT_STYLE_SHIFT; bool primary_is_weapon = (material_id & MATERIAL_FLAG_WEAPON) != 0; @@ -498,7 +498,7 @@ main() imageStore(IMG_PT_NORMAL_A, ipos, uvec4(0)); imageStore(IMG_PT_GEO_NORMAL_A, ipos, uvec4(0)); imageStore(IMG_PT_ALBEDO, ipos, vec4(0)); - imageStore(IMG_PT_METALLIC, ipos, vec4(0)); + imageStore(IMG_PT_METALLIC_A, ipos, vec4(0)); imageStore(IMG_PT_TRANSPARENT, ipos, transparent); imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(position + direction * PRIMARY_RAY_T_MAX, uintBitsToFloat(material_id))); imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, float(checkerboard_flags))); @@ -691,6 +691,17 @@ main() transparent = alpha_blend_premultiplied(transparent, vec4(primary_emissive * throughput, 0)); } + bool is_dynamic_primitive = is_dynamic_instance(ray_payload_brdf); + uint primitive_id = get_primitive(ray_payload_brdf); + uint visbuf_instance_info = is_dynamic_primitive ? get_instance_id_instanced(primitive_id) : VISBUF_STATIC_GEOMETRY; + + vec4 vis_buf; + vis_buf.xy = bary.yz; + vis_buf.z = uintBitsToFloat(visbuf_instance_info); + vis_buf.w = uintBitsToFloat(primitive_id); + + imageStore(IMG_PT_VISBUF_A, ipos, vis_buf); + // Store the surface parameters into the G-buffer for the indirect lighting shader if(is_camera(material_id)) { @@ -711,7 +722,7 @@ main() imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, float(checkerboard_flags))); imageStore(IMG_PT_THROUGHPUT, ipos, vec4(throughput, optical_path_length)); imageStore(IMG_PT_TRANSPARENT, ipos, transparent); - imageStore(IMG_PT_CLUSTER, ipos, uvec4(cluster_idx)); + imageStore(IMG_PT_CLUSTER_A, ipos, uvec4(cluster_idx)); imageStore(IMG_PT_ALBEDO, ipos, vec4(primary_albedo, primary_specular)); - imageStore(IMG_PT_METALLIC, ipos, vec4(primary_metallic, primary_roughness, 0, 0)); + imageStore(IMG_PT_METALLIC_A, ipos, vec4(primary_metallic, primary_roughness, primary_specular, 0)); } diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 41123d2e1..9a9156010 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -86,9 +86,9 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_INSTANCE_GEOMETRY_COMP) \ SHADER_MODULE_DO(QVK_MOD_ANIMATE_MATERIALS_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_SEED_RNG_COMP) \ - SHADER_MODULE_DO(QVK_MOD_ASVGF_FWD_PROJECT_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_IMG_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_ATROUS_COMP) \ + SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_REPROJECT_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_ATROUS_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_LF_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_TEMPORAL_COMP) \ @@ -440,11 +440,11 @@ void create_orthographic_matrix(float matrix[16], float xmin, float xmax, PROFILER_DO(PROFILER_FRAME_TIME, 0) \ PROFILER_DO(PROFILER_INSTANCE_GEOMETRY, 1) \ PROFILER_DO(PROFILER_BVH_UPDATE, 1) \ - PROFILER_DO(PROFILER_ASVGF_GRADIENT_SAMPLES, 1) \ - PROFILER_DO(PROFILER_ASVGF_DO_GRADIENT_SAMPLES, 2) \ + PROFILER_DO(PROFILER_ASVGF_SEED_RNG, 1) \ PROFILER_DO(PROFILER_PRIMARY_RAYS, 1) \ PROFILER_DO(PROFILER_REFLECT_REFRACT_1, 1) \ PROFILER_DO(PROFILER_REFLECT_REFRACT_2, 1) \ + PROFILER_DO(PROFILER_ASVGF_GRADIENT_REPROJECT, 1) \ PROFILER_DO(PROFILER_DIRECT_LIGHTING, 1) \ PROFILER_DO(PROFILER_INDIRECT_LIGHTING, 1) \ PROFILER_DO(PROFILER_ASVGF_FULL, 1) \ @@ -620,7 +620,8 @@ VkResult vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf); VkResult vkpt_compositing(VkCommandBuffer cmd_buf); VkResult vkpt_interleave(VkCommandBuffer cmd_buf); VkResult vkpt_taa(VkCommandBuffer cmd_buf); -VkResult vkpt_asvgf_create_gradient_samples(VkCommandBuffer cmd_buf, uint32_t frame_num, int do_gradient_samples); +VkResult vkpt_asvgf_seed_rng(VkCommandBuffer cmd_buf); +VkResult vkpt_asvgf_gradient_reproject(VkCommandBuffer cmd_buf); VkResult vkpt_bloom_initialize(); VkResult vkpt_bloom_destroy(); From 285df051f1460a1b315dc482097a55eddb9df66c Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 19 Nov 2020 06:42:49 -0800 Subject: [PATCH 18/48] Rolled the asvgf_seed_rng shader into primary_rays. --- src/CMakeLists.txt | 1 - src/refresh/vkpt/asvgf.c | 101 -------------------- src/refresh/vkpt/main.c | 4 - src/refresh/vkpt/path_tracer.c | 1 + src/refresh/vkpt/profiler.c | 1 - src/refresh/vkpt/shader/asvgf_seed_rng.comp | 60 ------------ src/refresh/vkpt/shader/primary_rays.rgen | 17 +++- src/refresh/vkpt/vkpt.h | 3 - 8 files changed, 17 insertions(+), 171 deletions(-) delete mode 100644 src/refresh/vkpt/shader/asvgf_seed_rng.comp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f78c1f9f9..6fa2ddcd7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -313,7 +313,6 @@ set(SRC_SHADERS refresh/vkpt/shader/asvgf_gradient_img.comp refresh/vkpt/shader/asvgf_gradient_reproject.comp refresh/vkpt/shader/asvgf_lf.comp - refresh/vkpt/shader/asvgf_seed_rng.comp refresh/vkpt/shader/asvgf_taa.comp refresh/vkpt/shader/asvgf_taau.comp refresh/vkpt/shader/asvgf_temporal.comp diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index 147672b78..5c8c10ebe 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -20,7 +20,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "vkpt.h" enum { - SEED_RNG, GRADIENT_IMAGE, GRADIENT_ATROUS, GRADIENT_REPROJECT, @@ -112,11 +111,6 @@ vkpt_asvgf_create_pipelines() }; VkComputePipelineCreateInfo pipeline_info[ASVGF_NUM_PIPELINES] = { - [SEED_RNG] = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = SHADER_STAGE(QVK_MOD_ASVGF_SEED_RNG_COMP, VK_SHADER_STAGE_COMPUTE_BIT), - .layout = pipeline_layout_atrous, - }, [GRADIENT_IMAGE] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE(QVK_MOD_ASVGF_GRADIENT_IMG_COMP, VK_SHADER_STAGE_COMPUTE_BIT), @@ -216,101 +210,6 @@ vkpt_asvgf_destroy_pipelines() ); \ } while(0) -#define BARRIER_TO_CLEAR(cmd_buf, img) \ - do { \ - VkImageSubresourceRange subresource_range = { \ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, \ - .baseMipLevel = 0, \ - .levelCount = 1, \ - .baseArrayLayer = 0, \ - .layerCount = 1 \ - }; \ - IMAGE_BARRIER(cmd_buf, \ - .image = img, \ - .subresourceRange = subresource_range, \ - .srcAccessMask = 0, \ - .dstAccessMask = 0, \ - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, \ - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, \ - ); \ - } while(0) - -#define BARRIER_FROM_CLEAR(cmd_buf, img) \ - do { \ - VkImageSubresourceRange subresource_range = { \ - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, \ - .baseMipLevel = 0, \ - .levelCount = 1, \ - .baseArrayLayer = 0, \ - .layerCount = 1 \ - }; \ - IMAGE_BARRIER(cmd_buf, \ - .image = img, \ - .subresourceRange = subresource_range, \ - .srcAccessMask = 0, \ - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, \ - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, \ - .newLayout = VK_IMAGE_LAYOUT_GENERAL, \ - ); \ - } while(0) - - -VkResult -vkpt_asvgf_seed_rng(VkCommandBuffer cmd_buf) -{ - VkDescriptorSet desc_sets[] = { - qvk.desc_set_ubo, - qvk_get_current_desc_set_textures(), - qvk.desc_set_vertex_buffer - }; - VkClearColorValue clear_grd_smpl_pos = { - .uint32 = { 0, 0, 0, 0 } - }; - - VkImageSubresourceRange subresource_range = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1 - }; - - int current_sample_pos_image = VKPT_IMG_ASVGF_GRAD_SMPL_POS_A + (qvk.frame_counter & 1); - - BARRIER_TO_CLEAR(cmd_buf, qvk.images[current_sample_pos_image]); - vkCmdClearColorImage(cmd_buf, qvk.images[current_sample_pos_image], - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_grd_smpl_pos, 1, &subresource_range); - BARRIER_FROM_CLEAR(cmd_buf, qvk.images[current_sample_pos_image]); - - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + (qvk.frame_counter & 1)]); - - for(uint32_t gpu = 0; gpu < qvk.device_count; gpu++) - { - set_current_gpu(cmd_buf, gpu); - - uint32_t push_constants[1] = { - gpu - }; - - vkCmdPushConstants(cmd_buf, pipeline_layout_atrous, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); - - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[SEED_RNG]); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); - vkCmdDispatch(cmd_buf, - (qvk.gpu_slice_width + 15) / 16, - (qvk.extent_render.height + 15) / 16, - 1); - } - - set_current_gpu(cmd_buf, ALL_GPUS); - - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + (qvk.frame_counter & 1)]); - - return VK_SUCCESS; -} - VkResult vkpt_asvgf_gradient_reproject(VkCommandBuffer cmd_buf) { diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 76e7dac8c..d2629e94a 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -2436,10 +2436,6 @@ R_RenderFrame_RTX(refdef_t *fd) vkpt_instance_geometry(trace_cmd_buf, upload_info.num_instances, update_world_animations); END_PERF_MARKER(trace_cmd_buf, PROFILER_INSTANCE_GEOMETRY); - BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_SEED_RNG); - vkpt_asvgf_seed_rng(trace_cmd_buf); - END_PERF_MARKER(trace_cmd_buf, PROFILER_ASVGF_SEED_RNG); - BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_BVH_UPDATE); assert(upload_info.num_vertices % 3 == 0); build_transparency_blas(trace_cmd_buf); diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index 01db89f35..c017e6df9 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -1055,6 +1055,7 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_CLUSTER_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VIEW_DEPTH_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_NORMAL_A + frame_idx]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + frame_idx]); return VK_SUCCESS; } diff --git a/src/refresh/vkpt/profiler.c b/src/refresh/vkpt/profiler.c index df45d3ead..850ac2d5d 100644 --- a/src/refresh/vkpt/profiler.c +++ b/src/refresh/vkpt/profiler.c @@ -149,7 +149,6 @@ draw_profiler(int enable_asvgf) PROFILER_DO(PROFILER_BVH_UPDATE, 1); PROFILER_DO(PROFILER_UPDATE_ENVIRONMENT, 1); PROFILER_DO(PROFILER_SHADOW_MAP, 1); - PROFILER_DO(PROFILER_ASVGF_SEED_RNG, 1); PROFILER_DO(PROFILER_PRIMARY_RAYS, 1); if (cvar_pt_reflect_refract->integer > 0) { PROFILER_DO(PROFILER_REFLECT_REFRACT_1, 1); } if (cvar_pt_reflect_refract->integer > 1) { PROFILER_DO(PROFILER_REFLECT_REFRACT_2, 1); } diff --git a/src/refresh/vkpt/shader/asvgf_seed_rng.comp b/src/refresh/vkpt/shader/asvgf_seed_rng.comp deleted file mode 100644 index 3b36f3226..000000000 --- a/src/refresh/vkpt/shader/asvgf_seed_rng.comp +++ /dev/null @@ -1,60 +0,0 @@ -/* -Copyright (C) 2018 Christoph Schied -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -// ========================================================================== // -// Computes the RNG seed numbers for every pixel on the screen. -// ========================================================================== // - -#version 460 -#extension GL_GOOGLE_include_directive : enable -#extension GL_EXT_nonuniform_qualifier : enable - -layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; - -layout(push_constant, std140) uniform MgpuInfo { - uint gpu_index; -} push; - -#define GLOBAL_UBO_DESC_SET_IDX 0 -#include "global_ubo.h" - -#define GLOBAL_TEXTURES_DESC_SET_IDX 1 -#include "global_textures.h" - -void -main() -{ - ivec2 ipos = ivec2(gl_GlobalInvocationID); - int frame_num = global_ubo.current_frame_idx; - - // We want different RNG's on the two checkerboard fields. - uint checkerboard = (push.gpu_index & 1); - if(ipos.x >= global_ubo.width / 2) - checkerboard = 1; - - uint rng_seed = 0; - - uint frame_offset = frame_num / NUM_BLUE_NOISE_TEX; - - rng_seed |= (uint(ipos.x + frame_offset) % BLUE_NOISE_RES) << 0u; - rng_seed |= (uint(ipos.y + (frame_offset << 4)) % BLUE_NOISE_RES) << 10u; - rng_seed |= uint(frame_num + checkerboard) << 20; - - imageStore(IMG_ASVGF_RNG_SEED_A, ipos, uvec4(rng_seed)); -} diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 87b5ca180..7e3c667a0 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -94,6 +94,20 @@ get_primary_ray(vec2 screen_pos) return ray; } +void generate_rng_seed(ivec2 ipos, bool is_odd_checkerboard) +{ + int frame_num = global_ubo.current_frame_idx; + + uint frame_offset = frame_num / NUM_BLUE_NOISE_TEX; + + rng_seed = 0; + rng_seed |= (uint(ipos.x + frame_offset) % BLUE_NOISE_RES) << 0u; + rng_seed |= (uint(ipos.y + (frame_offset << 4)) % BLUE_NOISE_RES) << 10u; + rng_seed |= uint(frame_num + uint(is_odd_checkerboard)) << 20; + + imageStore(IMG_ASVGF_RNG_SEED_A, ipos, uvec4(rng_seed)); +} + void main() { @@ -103,7 +117,8 @@ main() bool is_odd_checkerboard = (gl_LaunchIDNV.z != 0) || (push_constants.gpu_index == 1); - rng_seed = texelFetch(TEX_ASVGF_RNG_SEED_A, ipos, 0).r; + generate_rng_seed(ipos, is_odd_checkerboard); + vec3 position; vec3 direction; diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 9a9156010..2f1285675 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -85,7 +85,6 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_SPRITE_RAHIT) \ SHADER_MODULE_DO(QVK_MOD_INSTANCE_GEOMETRY_COMP) \ SHADER_MODULE_DO(QVK_MOD_ANIMATE_MATERIALS_COMP) \ - SHADER_MODULE_DO(QVK_MOD_ASVGF_SEED_RNG_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_IMG_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_ATROUS_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_REPROJECT_COMP) \ @@ -440,7 +439,6 @@ void create_orthographic_matrix(float matrix[16], float xmin, float xmax, PROFILER_DO(PROFILER_FRAME_TIME, 0) \ PROFILER_DO(PROFILER_INSTANCE_GEOMETRY, 1) \ PROFILER_DO(PROFILER_BVH_UPDATE, 1) \ - PROFILER_DO(PROFILER_ASVGF_SEED_RNG, 1) \ PROFILER_DO(PROFILER_PRIMARY_RAYS, 1) \ PROFILER_DO(PROFILER_REFLECT_REFRACT_1, 1) \ PROFILER_DO(PROFILER_REFLECT_REFRACT_2, 1) \ @@ -620,7 +618,6 @@ VkResult vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf); VkResult vkpt_compositing(VkCommandBuffer cmd_buf); VkResult vkpt_interleave(VkCommandBuffer cmd_buf); VkResult vkpt_taa(VkCommandBuffer cmd_buf); -VkResult vkpt_asvgf_seed_rng(VkCommandBuffer cmd_buf); VkResult vkpt_asvgf_gradient_reproject(VkCommandBuffer cmd_buf); VkResult vkpt_bloom_initialize(); From 46207198c2248914567c6d72135bbecee6b13770 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 19 Nov 2020 07:22:20 -0800 Subject: [PATCH 19/48] Optimized the gradient reprojection shader by going over the pixels in the 3x3 square in separate threads. --- src/refresh/vkpt/asvgf.c | 6 +- .../vkpt/shader/asvgf_gradient_reproject.comp | 114 +++++++++++------- 2 files changed, 77 insertions(+), 43 deletions(-) diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index 5c8c10ebe..fed1526f4 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -224,9 +224,11 @@ vkpt_asvgf_gradient_reproject(VkCommandBuffer cmd_buf) vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[GRADIENT_REPROJECT]); vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); + + uint32_t group_size_pixels = 24; // matches GROUP_SIZE_PIXELS in asvgf_gradient_reproject.comp vkCmdDispatch(cmd_buf, - (qvk.gpu_slice_width_prev / GRAD_DWN + 15) / 16, - (qvk.extent_render_prev.height / GRAD_DWN + 15) / 16, + (qvk.gpu_slice_width + group_size_pixels - 1) / group_size_pixels, + (qvk.extent_render.height + group_size_pixels - 1) / group_size_pixels, 1); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_RNG_SEED_A + (qvk.frame_counter & 1)]); diff --git a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp index 0a8d85a9b..6ed511a03 100644 --- a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp +++ b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp @@ -21,8 +21,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_nonuniform_qualifier : enable -layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; - #include "utils.glsl" #define GLOBAL_UBO_DESC_SET_IDX 0 @@ -38,6 +36,12 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; #include "read_visbuf.glsl" #include "projection.glsl" +// optimal group size determined experimentally on a 3090 +#define GROUP_SIZE_GRAD 8 +#define GROUP_SIZE_PIXELS (GROUP_SIZE_GRAD*GRAD_DWN) + +layout(local_size_x = GROUP_SIZE_PIXELS, local_size_y = GROUP_SIZE_PIXELS, local_size_z = 1) in; + void patch_position(ivec2 ipos, ivec2 found_pos_prev) { vec4 vis_buf = texelFetch(TEX_PT_VISBUF_B, found_pos_prev, 0); @@ -103,11 +107,55 @@ void patch_position(ivec2 ipos, ivec2 found_pos_prev) } } +shared vec4 s_reprojected_pixels[GROUP_SIZE_PIXELS][GROUP_SIZE_PIXELS]; + +void reproject_pixel(ivec2 p, int field_left, int field_right) +{ + ivec2 local_pos = ivec2(gl_LocalInvocationID); + s_reprojected_pixels[local_pos.y][local_pos.x] = vec4(0); + + vec4 motion = texelFetch(TEX_PT_MOTION, p, 0); + vec2 pos_prev = ((vec2(p) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); + ivec2 pp = ivec2(floor(pos_prev)); + + if(pp.x < field_left || pp.x >= field_right || pp.y >= global_ubo.prev_height) + return; + + ivec2 pos_grad_prev = pp / GRAD_DWN; + + uint prev_grad_sample_pos = texelFetch(TEX_ASVGF_GRAD_SMPL_POS_B, pp / GRAD_DWN, 0).x; + ivec2 stratum_prev = ivec2( + prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 0), + prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 1)) & STRATUM_OFFSET_MASK; + + if(all(equal(pos_grad_prev * GRAD_DWN + stratum_prev, pp))) + return; + + uint cluster_curr = texelFetch(TEX_PT_CLUSTER_A, p, 0).x; + uint cluster_prev = texelFetch(TEX_PT_CLUSTER_B, pp, 0).x; + float depth_curr = texelFetch(TEX_PT_VIEW_DEPTH_A, p, 0).x; + float depth_prev = texelFetch(TEX_PT_VIEW_DEPTH_B, pp, 0).x; + vec3 geo_normal_curr = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, p, 0).x); + vec3 geo_normal_prev = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_B, pp, 0).x); + + float dist_depth = abs(depth_curr - depth_prev + motion.z) / abs(depth_curr); + float dot_geo_normals = dot(geo_normal_curr, geo_normal_prev); + + if(cluster_curr == cluster_prev && dist_depth < 0.1 && dot_geo_normals > 0.9) + { + vec3 prev_hf = unpackRGBE(texelFetch(TEX_PT_COLOR_HF, pp, 0).x); + vec3 prev_spec = unpackRGBE(texelFetch(TEX_PT_COLOR_SPEC, pp, 0).x); + vec2 prev_lum = vec2(luminance(prev_hf), luminance(prev_spec)); + + s_reprojected_pixels[local_pos.y][local_pos.x] = vec4(pp, prev_lum); + } +} + void main() { - ivec2 pos_grad = ivec2(gl_GlobalInvocationID); - ivec2 ipos = pos_grad * GRAD_DWN; + ivec2 ipos = ivec2(gl_GlobalInvocationID); + ivec2 pos_grad = ipos / GRAD_DWN; bool found = false; ivec2 found_offset = ivec2(0); @@ -122,53 +170,37 @@ main() field_right = global_ubo.prev_width; } - for(int offy = 0; offy < GRAD_DWN; offy++) - { - for(int offx = 0; offx < GRAD_DWN; offx++) - { - ivec2 p = ipos + ivec2(offx, offy); + reproject_pixel(ipos, field_left, field_right); - vec4 motion = texelFetch(TEX_PT_MOTION, p, 0); - vec2 pos_prev = ((vec2(p) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); - ivec2 pp = ivec2(floor(pos_prev)); + barrier(); - if(pp.x < field_left || pp.x >= field_right || pp.y >= global_ubo.prev_height) - continue; + ivec2 local_pos; + local_pos.x = int(gl_LocalInvocationIndex) % GROUP_SIZE_GRAD; + local_pos.y = int(gl_LocalInvocationIndex) / GROUP_SIZE_GRAD; - ivec2 pos_grad_prev = pp / GRAD_DWN; + if(local_pos.y >= GROUP_SIZE_GRAD) + return; - uint prev_grad_sample_pos = texelFetch(TEX_ASVGF_GRAD_SMPL_POS_B, pp / GRAD_DWN, 0).x; - ivec2 stratum_prev = ivec2( - prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 0), - prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 1)) & STRATUM_OFFSET_MASK; + pos_grad = ivec2(gl_WorkGroupID) * GROUP_SIZE_GRAD + local_pos; + ipos = pos_grad * GRAD_DWN; - if(all(equal(pos_grad_prev * GRAD_DWN + stratum_prev, pp))) - continue; + for(int offy = 0; offy < GRAD_DWN; offy++) + { + for(int offx = 0; offx < GRAD_DWN; offx++) + { + ivec2 p = local_pos * GRAD_DWN + ivec2(offx, offy); - uint cluster_curr = texelFetch(TEX_PT_CLUSTER_A, p, 0).x; - uint cluster_prev = texelFetch(TEX_PT_CLUSTER_B, pp, 0).x; - float depth_curr = texelFetch(TEX_PT_VIEW_DEPTH_A, p, 0).x; - float depth_prev = texelFetch(TEX_PT_VIEW_DEPTH_B, pp, 0).x; - vec3 geo_normal_curr = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, p, 0).x); - vec3 geo_normal_prev = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_B, pp, 0).x); + vec4 reprojected_pixel = s_reprojected_pixels[p.y][p.x]; - float dist_depth = abs(depth_curr - depth_prev + motion.z) / abs(depth_curr); - float dot_geo_normals = dot(geo_normal_curr, geo_normal_prev); + vec2 prev_lum = reprojected_pixel.zw; - if(cluster_curr == cluster_prev && dist_depth < 0.1 && dot_geo_normals > 0.9) + if(prev_lum.x > found_prev_lum.x) { - vec3 prev_hf = unpackRGBE(texelFetch(TEX_PT_COLOR_HF, pp, 0).x); - vec3 prev_spec = unpackRGBE(texelFetch(TEX_PT_COLOR_SPEC, pp, 0).x); - vec2 prev_lum = vec2(luminance(prev_hf), luminance(prev_spec)); - - if(prev_lum.x > found_prev_lum.x) - { - found_prev_lum = prev_lum; - found_offset = ivec2(offx, offy); - found_pos_prev = pp; - found = true; - } + found_prev_lum = prev_lum; + found_offset = ivec2(offx, offy); + found_pos_prev = ivec2(reprojected_pixel.xy); + found = true; } } } From e79d5dcde45c8630d57407fe728f002a4c31e30d Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 19 Nov 2020 07:53:38 -0800 Subject: [PATCH 20/48] Added some comments to the gradient reprojection shader. --- .../vkpt/shader/asvgf_gradient_reproject.comp | 77 ++++++++++++++++--- 1 file changed, 65 insertions(+), 12 deletions(-) diff --git a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp index 6ed511a03..afe6337d7 100644 --- a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp +++ b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp @@ -17,6 +17,18 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +// ========================================================================== // +// This shader tries to match the surfaces from the current frame with the +// previous frame. In every 3x3 square, a single matching pixel is selected. +// This pixel becomes a "gradient" sample for the lighting passes. Gradient +// samples are shaded using the previous frame's random number sequence and +// important surface parameters like normal and roughness. The goal is to +// compare the lighting environment for such pixels between the current and +// previous frames. +// +// See `asvgf.glsl` for general information about denoisers in Q2RTX. +// ========================================================================== // + #version 460 #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_nonuniform_qualifier : enable @@ -34,7 +46,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "asvgf.glsl" #include "read_visbuf.glsl" -#include "projection.glsl" // optimal group size determined experimentally on a 3090 #define GROUP_SIZE_GRAD 8 @@ -42,8 +53,14 @@ with this program; if not, write to the Free Software Foundation, Inc., layout(local_size_x = GROUP_SIZE_PIXELS, local_size_y = GROUP_SIZE_PIXELS, local_size_z = 1) in; +// Using the visibility buffer, compute the new position of the surface that we found +// in the previous frame. Exact matching of gradient surface positions is important +// to avoid false positive gradients in the light penumbras, where a subpixel shift +// that normally happens between frames can make a difference between a light sample +// being visible or shadowed. void patch_position(ivec2 ipos, ivec2 found_pos_prev) { + // Read the visibility buffer vec4 vis_buf = texelFetch(TEX_PT_VISBUF_B, found_pos_prev, 0); uint visbuf_instance_info = floatBitsToUint(vis_buf.z); uint primitive_id = floatBitsToUint(vis_buf.w); @@ -54,9 +71,11 @@ void patch_position(ivec2 ipos, ivec2 found_pos_prev) bool is_dynamic = false; - - if(visbuf_is_world_instance(visbuf_instance_info)) { - if(!visbuf_is_static_world_model(visbuf_instance_info)) { + // Map the dynamic objects geometry data from the previous frame into the current frame + if(visbuf_is_world_instance(visbuf_instance_info)) + { + if(!visbuf_is_static_world_model(visbuf_instance_info)) + { instance_id_prev &= ~VISBUF_WORLD_INSTANCE_FLAG; instance_id_curr = instance_buffer.world_prev_to_current[instance_id_prev]; @@ -72,10 +91,11 @@ void patch_position(ivec2 ipos, ivec2 found_pos_prev) instance_id_curr |= VISBUF_WORLD_INSTANCE_FLAG; } } - else { + else + { instance_id_curr = instance_buffer.model_prev_to_current[instance_id_prev]; - // the object no longer exists + // the object no longer exists if(instance_id_curr == ~0u) return; @@ -84,6 +104,7 @@ void patch_position(ivec2 ipos, ivec2 found_pos_prev) is_dynamic = true; } + // Load the triangle data Triangle triangle; if(is_dynamic) triangle = get_instanced_triangle(primitive_id); @@ -94,11 +115,17 @@ void patch_position(ivec2 ipos, ivec2 found_pos_prev) bary.yz = vis_buf.xy; bary.x = 1.0 - bary.y - bary.z; + // Reconstruct the position based on the barycentrics vec3 position = triangle.positions * bary; float materialId = imageLoad(IMG_PT_SHADING_POSITION, ipos).w; imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(position, materialId)); + // For primary surfaces, i.e. not reflections or refractions, + // reconstruct the true view direction based on the exact position. + // The view direction is especially important for shiny materials + // where an indirect specular ray can hit a different object given + // a view direction with a subpixel offset. uint checkerboard_flags = int(imageLoad(IMG_PT_VIEW_DIRECTION, ipos).w); if (checkerboard_flags == CHECKERBOARD_FLAG_PRIMARY) { @@ -109,11 +136,17 @@ void patch_position(ivec2 ipos, ivec2 found_pos_prev) shared vec4 s_reprojected_pixels[GROUP_SIZE_PIXELS][GROUP_SIZE_PIXELS]; +// For a given pixel p, find its surface in the previous frame based on the motion vector. +// If such surface exists - based on depth and normal similarity - get the old diffuse +// and specular luminances that will be used to compute gradients later. void reproject_pixel(ivec2 p, int field_left, int field_right) { ivec2 local_pos = ivec2(gl_LocalInvocationID); + + // Initialize the shared memory unconditionally s_reprojected_pixels[local_pos.y][local_pos.x] = vec4(0); + // Compute the previous frame position of this surface vec4 motion = texelFetch(TEX_PT_MOTION, p, 0); vec2 pos_prev = ((vec2(p) + vec2(0.5)) * vec2(global_ubo.inv_width * 2, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width / 2, global_ubo.prev_height); ivec2 pp = ivec2(floor(pos_prev)); @@ -121,6 +154,7 @@ void reproject_pixel(ivec2 p, int field_left, int field_right) if(pp.x < field_left || pp.x >= field_right || pp.y >= global_ubo.prev_height) return; + // Fetch the previous frame gradient position... ivec2 pos_grad_prev = pp / GRAD_DWN; uint prev_grad_sample_pos = texelFetch(TEX_ASVGF_GRAD_SMPL_POS_B, pp / GRAD_DWN, 0).x; @@ -128,9 +162,14 @@ void reproject_pixel(ivec2 p, int field_left, int field_right) prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 0), prev_grad_sample_pos >> (STRATUM_OFFSET_SHIFT * 1)) & STRATUM_OFFSET_MASK; + // If this pixel was a gradient on the previous frame, don't use it. Two reasons: + // 1) Carrying forward the same random number sequence over multiple frames introduces bias. + // 2) Gradient pixels use light lists from the previous frame. If the same pixel was used + // as a gradient for more than one frame, we would need to keep the light lists from 2+ frames behind. if(all(equal(pos_grad_prev * GRAD_DWN + stratum_prev, pp))) return; + // Load the data for surface matching uint cluster_curr = texelFetch(TEX_PT_CLUSTER_A, p, 0).x; uint cluster_prev = texelFetch(TEX_PT_CLUSTER_B, pp, 0).x; float depth_curr = texelFetch(TEX_PT_VIEW_DEPTH_A, p, 0).x; @@ -141,12 +180,14 @@ void reproject_pixel(ivec2 p, int field_left, int field_right) float dist_depth = abs(depth_curr - depth_prev + motion.z) / abs(depth_curr); float dot_geo_normals = dot(geo_normal_curr, geo_normal_prev); + // Compare the surfaces if(cluster_curr == cluster_prev && dist_depth < 0.1 && dot_geo_normals > 0.9) { vec3 prev_hf = unpackRGBE(texelFetch(TEX_PT_COLOR_HF, pp, 0).x); vec3 prev_spec = unpackRGBE(texelFetch(TEX_PT_COLOR_SPEC, pp, 0).x); vec2 prev_lum = vec2(luminance(prev_hf), luminance(prev_spec)); + // Store the results into shared memory: previous frame position and luminances s_reprojected_pixels[local_pos.y][local_pos.x] = vec4(pp, prev_lum); } } @@ -154,14 +195,11 @@ void reproject_pixel(ivec2 p, int field_left, int field_right) void main() { + // First pass: the entire thread group is busy matching pixels with the previous frame + ivec2 ipos = ivec2(gl_GlobalInvocationID); ivec2 pos_grad = ipos / GRAD_DWN; - bool found = false; - ivec2 found_offset = ivec2(0); - ivec2 found_pos_prev = ivec2(0); - vec2 found_prev_lum = vec2(0); - int field_left = 0; int field_right = global_ubo.prev_width / 2; if (ipos.x >= global_ubo.width / 2) @@ -174,6 +212,13 @@ main() barrier(); + // Second pass: the first (GROUP_SIZE_GRAD)^2 pixels are looking for the brightest + // matching pixels in each 3x3 square. + + // Picking the brightest pixel helps prevent bright trails when the light has moved. + // If we just pick a random pixel in the the penumbra of the sun light for example, + // there is a high chance that this pixel will not receive any sun light due to random sampling of the sun. + // Overall, we'll miss the changing luminance of the moving penumbra, which is very well visible. ivec2 local_pos; local_pos.x = int(gl_LocalInvocationIndex) % GROUP_SIZE_GRAD; @@ -185,6 +230,11 @@ main() pos_grad = ivec2(gl_WorkGroupID) * GROUP_SIZE_GRAD + local_pos; ipos = pos_grad * GRAD_DWN; + bool found = false; + ivec2 found_offset = ivec2(0); + ivec2 found_pos_prev = ivec2(0); + vec2 found_prev_lum = vec2(0); + for(int offy = 0; offy < GRAD_DWN; offy++) { for(int offx = 0; offx < GRAD_DWN; offx++) @@ -195,7 +245,8 @@ main() vec2 prev_lum = reprojected_pixel.zw; - if(prev_lum.x > found_prev_lum.x) + // Use total luminance of diffuse and specular as the heuristic + if(prev_lum.x + prev_lum.y > found_prev_lum.x + found_prev_lum.y) { found_prev_lum = prev_lum; found_offset = ivec2(offx, offy); @@ -211,6 +262,8 @@ main() return; } + // Final pass: store the gradient information and patch the surface parameters + ipos += found_offset; uint gradient_idx = From 4ce4c78c839ca1744d5cf07596d09ae3c6f43819 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 20 Nov 2020 06:18:48 -0800 Subject: [PATCH 21/48] Optimized the TAAU shader by preloading the input color data into shared memory and doing PQEncode on it during preload. Note: this version will not work when input is larger than output, i.e. resolution scale over 100%. --- src/refresh/vkpt/shader/asvgf_taau.comp | 85 +++++++++++++++++++++---- 1 file changed, 72 insertions(+), 13 deletions(-) diff --git a/src/refresh/vkpt/shader/asvgf_taau.comp b/src/refresh/vkpt/shader/asvgf_taau.comp index 9cb58d31a..b18a988f9 100644 --- a/src/refresh/vkpt/shader/asvgf_taau.comp +++ b/src/refresh/vkpt/shader/asvgf_taau.comp @@ -27,7 +27,11 @@ with this program; if not, write to the Free Software Foundation, Inc., #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_nonuniform_qualifier : enable -layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; +#define GROUP_SIZE 16 +#define FILTER_RADIUS 1 +#define SHARED_SIZE (GROUP_SIZE + FILTER_RADIUS * 3) + +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in; #define GLOBAL_UBO_DESC_SET_IDX 0 #include "global_ubo.h" @@ -42,6 +46,8 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; #include "utils.glsl" #include "asvgf.glsl" +shared uvec2 s_color_pq[SHARED_SIZE][SHARED_SIZE]; + const float pq_m1 = 0.1593017578125; const float pq_m2 = 78.84375; const float pq_c1 = 0.8359375; @@ -69,19 +75,55 @@ vec3 PQEncode(vec3 image) return clamp(image, vec3(0), vec3(1)); } -void get_moments(ivec2 ipos, int r, out vec3 mom1, out vec3 mom2) +// Preload the color data into shared memory, convert to PQ space +void preload(ivec2 group_base, ivec2 group_size) +{ + ivec2 preload_size = min(group_size + ivec2(FILTER_RADIUS * 3), SHARED_SIZE); + + for(uint linear_idx = gl_LocalInvocationIndex; linear_idx < preload_size.x * preload_size.y; linear_idx += GROUP_SIZE * GROUP_SIZE) + { + // Convert the linear index to 2D index in a (preload_size x preload_size) virtual group + float t = (float(linear_idx) + 0.5) / float(preload_size.x); + int xx = int(floor(fract(t) * float(preload_size.x))); + int yy = int(floor(t)); + + // Load + ivec2 ipos = group_base + ivec2(xx, yy) - ivec2(FILTER_RADIUS); + ipos = clamp(ipos, ivec2(0), ivec2(global_ubo.width - 1, global_ubo.height - 1)); + vec4 color = texelFetch(TEX_FLAT_COLOR, ipos, 0); + vec3 color_pq = PQEncode(color.rgb); + + // Store + s_color_pq[yy][xx] = packHalf4x16(vec4(color_pq, color.a)); + } +} + +void get_shared_data(ivec2 pos, ivec2 group_base, out vec3 color_pq, out int checkerboard_flags) +{ + ivec2 addr = pos - group_base + ivec2(FILTER_RADIUS); + + vec4 data = unpackHalf4x16(s_color_pq[addr.y][addr.x]); + + color_pq = data.rgb; + checkerboard_flags = int(data.a); +} + +void get_moments(ivec2 pos, ivec2 group_base, int r, out vec3 mom1, out vec3 mom2) { mom1 = vec3(0.0); mom2 = vec3(0.0); - for(int yy = -r; yy <= r; yy++) { - for(int xx = -r; xx <= r; xx++) { + for(int yy = -r; yy <= r; yy++) + { + for(int xx = -r; xx <= r; xx++) + { if(xx == 0 && yy == 0) continue; - ivec2 p = ipos + ivec2(xx, yy); - vec3 c = texelFetch(TEX_FLAT_COLOR, p, 0).rgb; - c = PQEncode(c); + ivec2 p = pos + ivec2(xx, yy); + vec3 c; + int checkerboard_flags; + get_shared_data(p, group_base, c, checkerboard_flags); mom1 += c.rgb; mom2 += c.rgb * c.rgb; @@ -94,11 +136,26 @@ float get_sample_weight(vec2 delta, float scale) return clamp(1 - scale * dot(delta, delta), 0, 1); } +vec2 hires_to_lores(ivec2 ipos) +{ + vec2 scale = vec2(global_ubo.width * global_ubo.inv_unscaled_width, global_ubo.height * global_ubo.inv_unscaled_height); + + return (vec2(ipos) + vec2(0.5)) * scale - vec2(0.5) - global_ubo.sub_pixel_jitter; +} + void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); + ivec2 group_base_hires = ivec2(gl_WorkGroupID) * GROUP_SIZE; + ivec2 group_base_lores = ivec2(hires_to_lores(group_base_hires)); + ivec2 group_bottomright_hires = ivec2(gl_WorkGroupID) * GROUP_SIZE + ivec2(GROUP_SIZE - 1); + ivec2 group_bottomright_lores = ivec2(hires_to_lores(group_bottomright_hires)); + + preload(group_base_lores, group_bottomright_lores - group_base_lores + ivec2(1)); + barrier(); + if (ipos.x >= global_ubo.unscaled_width || ipos.y >= global_ubo.unscaled_height) { imageStore(IMG_TAA_OUTPUT, ipos, vec4(0)); @@ -106,16 +163,16 @@ main() } // Calculate position in the render buffer (at the lower render resolution) - vec2 nearest_render_pos = vec2(ipos.x + 0.5f, ipos.y + 0.5f) * vec2(global_ubo.width * global_ubo.inv_unscaled_width, global_ubo.height * global_ubo.inv_unscaled_height) - global_ubo.sub_pixel_jitter - vec2(0.5f); + vec2 nearest_render_pos = hires_to_lores(ipos); ivec2 int_render_pos = ivec2(round(nearest_render_pos.x), round(nearest_render_pos.y)); int_render_pos = clamp(int_render_pos, ivec2(0), ivec2(global_ubo.width - 1, global_ubo.height - 1)); - vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, int_render_pos, 0); - vec3 color_center = PQEncode(linear_color_center.rgb); - int checkerboard_flags = int(linear_color_center.a); + vec3 color_center; + int checkerboard_flags; + get_shared_data(int_render_pos, group_base_lores, color_center, checkerboard_flags); vec3 color_output = color_center; - vec3 linear_color_output = linear_color_center.rgb; + vec3 linear_color_output; if(global_ubo.flt_taa != 0) { @@ -127,7 +184,7 @@ main() int num_pix; // Obtain the color moments for the surrounding pixels. - get_moments(int_render_pos, 1, mom1, mom2); + get_moments(int_render_pos, group_base_lores, FILTER_RADIUS, mom1, mom2); num_pix = 9; // Remove or reduce sparkles by clamping the color of the center pixel to its surroundings @@ -208,6 +265,8 @@ main() // The frame is supposed to be static (paused), so no motion vectors or high quality sampling. // The accumulator is an RGBA32_FLOAT texture for higher accuracy. + linear_color_output = PQDecode(color_output); + if(global_ubo.temporal_blend_factor < 1) { vec3 prev_color = imageLoad(IMG_HQ_COLOR_INTERLEAVED, ipos).rgb; From e12a0727854db7b32c831c76b4084d70ed199dd2 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 20 Nov 2020 06:26:49 -0800 Subject: [PATCH 22/48] Optimized the TAAU shader some more by also preloading the motion vectors into shared memory. --- src/refresh/vkpt/shader/asvgf_taau.comp | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/refresh/vkpt/shader/asvgf_taau.comp b/src/refresh/vkpt/shader/asvgf_taau.comp index b18a988f9..bc7cbd029 100644 --- a/src/refresh/vkpt/shader/asvgf_taau.comp +++ b/src/refresh/vkpt/shader/asvgf_taau.comp @@ -47,6 +47,7 @@ layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) i #include "asvgf.glsl" shared uvec2 s_color_pq[SHARED_SIZE][SHARED_SIZE]; +shared uint s_motion[SHARED_SIZE][SHARED_SIZE]; const float pq_m1 = 0.1593017578125; const float pq_m2 = 78.84375; @@ -76,6 +77,7 @@ vec3 PQEncode(vec3 image) } // Preload the color data into shared memory, convert to PQ space +// Also preload the 2D motion vectors void preload(ivec2 group_base, ivec2 group_size) { ivec2 preload_size = min(group_size + ivec2(FILTER_RADIUS * 3), SHARED_SIZE); @@ -92,13 +94,15 @@ void preload(ivec2 group_base, ivec2 group_size) ipos = clamp(ipos, ivec2(0), ivec2(global_ubo.width - 1, global_ubo.height - 1)); vec4 color = texelFetch(TEX_FLAT_COLOR, ipos, 0); vec3 color_pq = PQEncode(color.rgb); + vec2 motion = texelFetch(TEX_FLAT_MOTION, ipos, 0).xy; // Store s_color_pq[yy][xx] = packHalf4x16(vec4(color_pq, color.a)); + s_motion[yy][xx] = packHalf2x16(motion); } } -void get_shared_data(ivec2 pos, ivec2 group_base, out vec3 color_pq, out int checkerboard_flags) +void get_shared_color(ivec2 pos, ivec2 group_base, out vec3 color_pq, out int checkerboard_flags) { ivec2 addr = pos - group_base + ivec2(FILTER_RADIUS); @@ -108,6 +112,13 @@ void get_shared_data(ivec2 pos, ivec2 group_base, out vec3 color_pq, out int che checkerboard_flags = int(data.a); } +vec2 get_shared_motion(ivec2 pos, ivec2 group_base) +{ + ivec2 addr = pos - group_base + ivec2(FILTER_RADIUS); + + return unpackHalf2x16(s_motion[addr.y][addr.x]); +} + void get_moments(ivec2 pos, ivec2 group_base, int r, out vec3 mom1, out vec3 mom2) { mom1 = vec3(0.0); @@ -123,7 +134,7 @@ void get_moments(ivec2 pos, ivec2 group_base, int r, out vec3 mom1, out vec3 mom ivec2 p = pos + ivec2(xx, yy); vec3 c; int checkerboard_flags; - get_shared_data(p, group_base, c, checkerboard_flags); + get_shared_color(p, group_base, c, checkerboard_flags); mom1 += c.rgb; mom2 += c.rgb * c.rgb; @@ -169,7 +180,7 @@ main() vec3 color_center; int checkerboard_flags; - get_shared_data(int_render_pos, group_base_lores, color_center, checkerboard_flags); + get_shared_color(int_render_pos, group_base_lores, color_center, checkerboard_flags); vec3 color_output = color_center; vec3 linear_color_output; @@ -210,7 +221,7 @@ main() for(int yy = -r; yy <= r; yy++) { for(int xx = -r; xx <= r; xx++) { ivec2 p = int_render_pos + ivec2(xx, yy); - vec2 m = texelFetch(TEX_FLAT_MOTION, p, 0).xy; + vec2 m = get_shared_motion(p, group_base_lores); float l = dot(m, m); if(l > len) { len = l; From f98111ed19f768599ddb30c1f895d2a094eae62c Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 20 Nov 2020 08:51:58 -0800 Subject: [PATCH 23/48] Implemented dynamic switching between TAA and TAAU depending on the resolution scale: TAAU is used when scale is <= 100%, TAA otherwise. Both are processed by the same shader now, so the old TAA shader is removed. Also made the handling of these modes and various resolutions more uniform, and fixed some issues like tone mapping differences between the modes. --- src/CMakeLists.txt | 1 - src/refresh/vkpt/asvgf.c | 31 +-- src/refresh/vkpt/bloom.c | 6 +- src/refresh/vkpt/draw.c | 8 +- src/refresh/vkpt/main.c | 62 +++-- src/refresh/vkpt/shader/asvgf_taa.comp | 214 ------------------ src/refresh/vkpt/shader/asvgf_taau.comp | 28 ++- src/refresh/vkpt/shader/bloom_blur.comp | 4 +- src/refresh/vkpt/shader/bloom_composite.comp | 4 +- src/refresh/vkpt/shader/constants.h | 7 +- src/refresh/vkpt/shader/global_textures.h | 19 +- src/refresh/vkpt/shader/global_ubo.h | 26 +-- src/refresh/vkpt/shader/primary_rays.rgen | 2 +- .../vkpt/shader/tone_mapping_apply.comp | 4 +- .../vkpt/shader/tone_mapping_histogram.comp | 4 +- src/refresh/vkpt/tone_mapping.c | 12 +- src/refresh/vkpt/vkpt.h | 6 +- 17 files changed, 112 insertions(+), 326 deletions(-) delete mode 100644 src/refresh/vkpt/shader/asvgf_taa.comp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6fa2ddcd7..684577883 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -313,7 +313,6 @@ set(SRC_SHADERS refresh/vkpt/shader/asvgf_gradient_img.comp refresh/vkpt/shader/asvgf_gradient_reproject.comp refresh/vkpt/shader/asvgf_lf.comp - refresh/vkpt/shader/asvgf_taa.comp refresh/vkpt/shader/asvgf_taau.comp refresh/vkpt/shader/asvgf_temporal.comp refresh/vkpt/shader/instance_geometry.comp diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index fed1526f4..470ab058d 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -29,7 +29,6 @@ enum { ATROUS_ITER_1, ATROUS_ITER_2, ATROUS_ITER_3, - TAA, TAAU, CHECKERBOARD_INTERLEAVE, COMPOSITING, @@ -41,8 +40,6 @@ static VkPipelineLayout pipeline_layout_atrous; static VkPipelineLayout pipeline_layout_general; static VkPipelineLayout pipeline_layout_taa; -extern cvar_t* cvar_flt_taa; - VkResult vkpt_asvgf_initialize() { @@ -161,11 +158,6 @@ vkpt_asvgf_create_pipelines() .stage = SHADER_STAGE(QVK_MOD_CHECKERBOARD_INTERLEAVE_COMP, VK_SHADER_STAGE_COMPUTE_BIT), .layout = pipeline_layout_general, }, - [TAA] = { - .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, - .stage = SHADER_STAGE(QVK_MOD_ASVGF_TAA_COMP, VK_SHADER_STAGE_COMPUTE_BIT), - .layout = pipeline_layout_general, - }, [TAAU] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE(QVK_MOD_ASVGF_TAAU_COMP, VK_SHADER_STAGE_COMPUTE_BIT), @@ -478,27 +470,18 @@ VkResult vkpt_taa(VkCommandBuffer cmd_buf) BEGIN_PERF_MARKER(cmd_buf, PROFILER_ASVGF_TAA); - if (cvar_flt_taa->integer == 2) - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TAAU]); - else - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TAA]); - + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TAAU]); + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_taa, 0, LENGTH(desc_sets), desc_sets, 0, 0); - VkExtent2D dispatch_size; - if (cvar_flt_taa->integer == 2) - dispatch_size = qvk.extent_unscaled; - else - { - dispatch_size = qvk.extent_render; + VkExtent2D dispatch_size = qvk.extent_taa_output; - if (dispatch_size.width < qvk.extent_screen_images.width) - dispatch_size.width += 8; + if (dispatch_size.width < qvk.extent_taa_images.width) + dispatch_size.width += 8; - if (dispatch_size.height < qvk.extent_screen_images.height) - dispatch_size.height += 8; - } + if (dispatch_size.height < qvk.extent_taa_images.height) + dispatch_size.height += 8; vkCmdDispatch(cmd_buf, (dispatch_size.width + 15) / 16, diff --git a/src/refresh/vkpt/bloom.c b/src/refresh/vkpt/bloom.c index 2f9bf87f6..7872ca2af 100644 --- a/src/refresh/vkpt/bloom.c +++ b/src/refresh/vkpt/bloom.c @@ -48,7 +48,6 @@ cvar_t *cvar_bloom_sigma = NULL; cvar_t *cvar_bloom_intensity = NULL; cvar_t *cvar_bloom_sigma_water = NULL; cvar_t *cvar_bloom_intensity_water = NULL; -extern cvar_t* cvar_flt_taa; static float bloom_intensity; static float bloom_sigma; @@ -56,7 +55,7 @@ static float under_water_animation; static void compute_push_constants() { - float sigma_pixels = bloom_sigma * (float)((cvar_flt_taa->integer == 2) ? qvk.extent_unscaled.height : qvk.extent_render.height); + float sigma_pixels = bloom_sigma * (float)(qvk.extent_taa_output.height); float effective_sigma = sigma_pixels * 0.25f; effective_sigma = min(effective_sigma, 100.f); @@ -301,8 +300,7 @@ vkpt_bloom_destroy_pipelines() VkResult vkpt_bloom_record_cmd_buffer(VkCommandBuffer cmd_buf) { - qboolean taau = cvar_flt_taa->integer == 2; - VkExtent2D extent = taau ? qvk.extent_unscaled : qvk.extent_render; + VkExtent2D extent = qvk.extent_taa_output; compute_push_constants(); diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index aabb5342f..0d36f829c 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -58,8 +58,6 @@ static VkDescriptorSetLayout desc_set_layout_sbo; static VkDescriptorPool desc_pool_sbo; static VkDescriptorSet desc_set_sbo[MAX_FRAMES_IN_FLIGHT]; -extern cvar_t* cvar_flt_taa; - VkExtent2D vkpt_draw_get_extent() { @@ -564,11 +562,9 @@ vkpt_final_blit_simple(VkCommandBuffer cmd_buf) .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL ); - qboolean taau = cvar_flt_taa->integer == 2; - VkOffset3D blit_size = { - .x = taau ? qvk.extent_unscaled.width : qvk.extent_render.width, - .y = taau ? qvk.extent_unscaled.height : qvk.extent_render.height, + .x = qvk.extent_taa_output.width, + .y = qvk.extent_taa_output.height, .z = 1 }; VkOffset3D blit_size_unscaled = { diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index d2629e94a..2ac9da840 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -231,8 +231,8 @@ vkpt_initialize_all(VkptInitFlags_t init_flags) qvk.extent_render = get_render_extent(); qvk.extent_screen_images = get_screen_image_extent(); - qvk.extent_taa.width = max(qvk.extent_screen_images.width, qvk.extent_unscaled.width); - qvk.extent_taa.height = max(qvk.extent_screen_images.height, qvk.extent_unscaled.height); + qvk.extent_taa_images.width = max(qvk.extent_screen_images.width, qvk.extent_unscaled.width); + qvk.extent_taa_images.height = max(qvk.extent_screen_images.height, qvk.extent_unscaled.height); qvk.gpu_slice_width = (qvk.extent_render.width + qvk.device_count - 1) / qvk.device_count; @@ -2013,6 +2013,33 @@ evaluate_reference_mode(reference_mode_t* ref_mode) ref_mode->reflect_refract = min(10, ref_mode->reflect_refract); } +static void +evaluate_taa_settings(const reference_mode_t* ref_mode) +{ + qvk.effective_aa_mode = AA_MODE_OFF; + qvk.extent_taa_output = qvk.extent_render; + + if (!ref_mode->enable_denoiser) + return; + + if (cvar_flt_taa->integer == AA_MODE_TAA) + { + qvk.effective_aa_mode = AA_MODE_TAA; + } + else if (cvar_flt_taa->integer == AA_MODE_UPSCALE) + { + if (qvk.extent_render.width > qvk.extent_unscaled.width || qvk.extent_render.height > qvk.extent_unscaled.height) + { + qvk.effective_aa_mode = AA_MODE_TAA; + } + else + { + qvk.effective_aa_mode = AA_MODE_UPSCALE; + qvk.extent_taa_output = qvk.extent_unscaled; + } + } +} + static void prepare_sky_matrix(float time, vec3_t sky_matrix[3]) { @@ -2068,6 +2095,8 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c memcpy(ubo->P_prev, ubo->P, sizeof(float) * 16); memcpy(ubo->invP_prev, ubo->invP, sizeof(float) * 16); ubo->cylindrical_hfov_prev = ubo->cylindrical_hfov; + ubo->prev_taa_output_width = ubo->taa_output_width; + ubo->prev_taa_output_height = ubo->taa_output_height; { float raw_proj[16]; @@ -2112,12 +2141,10 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->inv_height = 1.0f / (float)qvk.extent_render.height; ubo->unscaled_width = qvk.extent_unscaled.width; ubo->unscaled_height = qvk.extent_unscaled.height; - ubo->inv_unscaled_width = 1.0f / ubo->unscaled_width; - ubo->inv_unscaled_height = 1.0f / ubo->unscaled_height; - ubo->taa_width = qvk.extent_taa.width; - ubo->taa_height = qvk.extent_taa.height; - ubo->inv_taa_width = 1.0f / ubo->taa_width; - ubo->inv_taa_height = 1.0f / ubo->taa_height; + ubo->taa_image_width = qvk.extent_taa_images.width; + ubo->taa_image_height = qvk.extent_taa_images.height; + ubo->taa_output_width = qvk.extent_taa_output.width; + ubo->taa_output_height = qvk.extent_taa_output.height; ubo->current_gpu_slice_width = qvk.gpu_slice_width; ubo->prev_gpu_slice_width = qvk.gpu_slice_width_prev; ubo->screen_image_width = qvk.extent_screen_images.width; @@ -2166,7 +2193,7 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->pt_ndf_trim = 1.f; } } - else if(cvar_flt_taa->integer == 2) + else if(qvk.effective_aa_mode == AA_MODE_UPSCALE) { // adjust texture LOD bias to the resolution scale, i.e. use negative bias if scale is < 100 float resolution_scale = (drs_effective_scale != 0) ? (float)drs_effective_scale : (float)scr_viewsize->integer; @@ -2206,8 +2233,7 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->temporal_blend_factor = ref_mode->temporal_blend_factor; ubo->flt_enable = ref_mode->enable_denoiser; - if (!ref_mode->enable_denoiser) - ubo->flt_taa = 0; + ubo->flt_taa = qvk.effective_aa_mode; ubo->pt_num_bounce_rays = ref_mode->num_bounce_rays; ubo->pt_reflect_refract = ref_mode->reflect_refract; @@ -2228,22 +2254,16 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->flt_taa = 0; } - if (ubo->flt_taa) + if (qvk.effective_aa_mode == AA_MODE_UPSCALE) { int taa_index = (int)(qvk.frame_counter % NUM_TAA_SAMPLES); ubo->sub_pixel_jitter[0] = taa_samples[taa_index][0]; ubo->sub_pixel_jitter[1] = taa_samples[taa_index][1]; - - taa_index = (int)((qvk.frame_counter - 1) % NUM_TAA_SAMPLES); - ubo->prev_sub_pixel_jitter[0] = taa_samples[taa_index][0]; - ubo->prev_sub_pixel_jitter[1] = taa_samples[taa_index][1]; } else { ubo->sub_pixel_jitter[0] = 0.f; ubo->sub_pixel_jitter[1] = 0.f; - ubo->prev_sub_pixel_jitter[0] = 0.f; - ubo->prev_sub_pixel_jitter[1] = 0.f; } ubo->first_person_model = cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON; @@ -2331,6 +2351,7 @@ R_RenderFrame_RTX(refdef_t *fd) reference_mode_t ref_mode; evaluate_reference_mode(&ref_mode); + evaluate_taa_settings(&ref_mode); qboolean menu_mode = cl_paused->integer == 1 && uis.menuDepth > 0 && render_world; @@ -2734,6 +2755,8 @@ R_BeginFrame_RTX(void) recreate_swapchain(); } + + retry:; #ifdef VKPT_DEVICE_GROUPS VkAcquireNextImageInfoKHR acquire_info = { @@ -2800,7 +2823,7 @@ R_EndFrame_RTX(void) if (frame_ready) { - if (cvar_flt_taa->integer == 2) + if (qvk.effective_aa_mode == AA_MODE_UPSCALE) { vkpt_final_blit_simple(cmd_buf); } @@ -3009,7 +3032,6 @@ R_Init_RTX(qboolean total) cvar_flt_temporal_hf->changed = temporal_cvar_changed; cvar_flt_temporal_lf->changed = temporal_cvar_changed; cvar_flt_temporal_spec->changed = temporal_cvar_changed; - cvar_flt_taa->changed = temporal_cvar_changed; cvar_flt_enable->changed = temporal_cvar_changed; cvar_pt_dof->changed = accumulation_cvar_changed; diff --git a/src/refresh/vkpt/shader/asvgf_taa.comp b/src/refresh/vkpt/shader/asvgf_taa.comp deleted file mode 100644 index f1296fba5..000000000 --- a/src/refresh/vkpt/shader/asvgf_taa.comp +++ /dev/null @@ -1,214 +0,0 @@ -/* -Copyright (C) 2018 Christoph Schied -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -// ========================================================================== // -// A simple temporal anti-aliasing filter that operates in the PQ (Perceptual -// Quantizer) color space, which improves high-contrast edges - for example, -// between some geometry and the sun. -// ========================================================================== // - -#version 460 -#extension GL_GOOGLE_include_directive : enable -#extension GL_EXT_nonuniform_qualifier : enable - -layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; - -#define GLOBAL_UBO_DESC_SET_IDX 0 -#include "global_ubo.h" - -#define GLOBAL_TEXTURES_DESC_SET_IDX 1 -#include "global_textures.h" - -#define VERTEX_BUFFER_DESC_SET_IDX 2 -#define VERTEX_READONLY 1 -#include "vertex_buffer.h" - -#include "utils.glsl" -#include "asvgf.glsl" - -const float pq_m1 = 0.1593017578125; -const float pq_m2 = 78.84375; -const float pq_c1 = 0.8359375; -const float pq_c2 = 18.8515625; -const float pq_c3 = 18.6875; -const float pq_C = 10000.0; - -vec3 PQDecode(vec3 image) -{ - vec3 Np = pow(max(image, 0.0), vec3(1.0 / pq_m2)); - vec3 L = Np - pq_c1; - L = L / (pq_c2 - pq_c3 * Np); - L = pow(max(L, 0.0), vec3(1.0 / pq_m1)); - - return L * pq_C; // returns cd/m^2 -} - -vec3 PQEncode(vec3 image) -{ - vec3 L = image / pq_C; - vec3 Lm = pow(max(L, 0.0), vec3(pq_m1)); - vec3 N = (pq_c1 + pq_c2 * Lm) / (1.0 + pq_c3 * Lm); - image = pow(N, vec3(pq_m2)); - - return clamp(image, vec3(0), vec3(1)); -} - -void get_moments(ivec2 ipos, int r, out vec3 mom1, out vec3 mom2) -{ - mom1 = vec3(0.0); - mom2 = vec3(0.0); - - for(int yy = -r; yy <= r; yy++) { - for(int xx = -r; xx <= r; xx++) { - if(xx == 0 && yy == 0) - continue; - - ivec2 p = ipos + ivec2(xx, yy); - vec3 c = texelFetch(TEX_FLAT_COLOR, p, 0).rgb; - c = PQEncode(c); - - mom1 += c.rgb; - mom2 += c.rgb * c.rgb; - } - } -} - -void -main() -{ - ivec2 ipos = ivec2(gl_GlobalInvocationID); - - if (ipos.x >= global_ubo.width || ipos.y >= global_ubo.height) - { - imageStore(IMG_TAA_OUTPUT, ipos, vec4(0)); - return; - } - - vec4 linear_color_center = texelFetch(TEX_FLAT_COLOR, ipos, 0); - vec3 color_center = PQEncode(linear_color_center.rgb); - int checkerboard_flags = int(linear_color_center.a); - - vec3 color_output = color_center; - vec3 linear_color_output = linear_color_center.rgb; - - if(global_ubo.flt_taa != 0) - { - // Regular TAA mode - - vec3 mom1; - vec3 mom2; - - int num_pix; - - // Obtain the color moments for the surrounding pixels. - get_moments(ipos, 1, mom1, mom2); - num_pix = 9; - - // Remove or reduce sparkles by clamping the color of the center pixel to its surroundings - if(global_ubo.flt_taa_anti_sparkle > 0) - { - // Custom curve to make perceived blurriness depend on the cvar in a roughly linear way - float scale = pow(min(1.0, global_ubo.flt_taa_anti_sparkle), -0.25); - - color_center = min(color_center, scale * mom1 / (num_pix - 1)); - } - - mom1 += color_center; - mom2 += color_center * color_center; - - mom1 /= float(num_pix); - mom2 /= float(num_pix); - - // Find the longest motion vector in a 3x3 window - vec2 motion; - { - float len = -1; - const int r = 1; - for(int yy = -r; yy <= r; yy++) { - for(int xx = -r; xx <= r; xx++) { - ivec2 p = ipos + ivec2(xx, yy); - vec2 m = texelFetch(TEX_FLAT_MOTION, p, 0).xy; - float l = dot(m, m); - if(l > len) { - len = l; - motion = m; - } - - } - } - } - - vec2 pos_prev = ((vec2(ipos) + vec2(0.5)) * vec2(global_ubo.inv_width, global_ubo.inv_height) + motion.xy) * vec2(global_ubo.prev_width, global_ubo.prev_height); - - if(all(greaterThanEqual(ivec2(pos_prev), ivec2(1))) - && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.width, global_ubo.height) - 1))) - { - // Motion vector was valid - sample the previous frame - vec3 color_prev = sample_texture_catmull_rom(TEX_ASVGF_TAA_B, pos_prev).rgb; - - if(!any(isnan(color_prev))) - { - // If enabled, apply neighbourhood color clamping (NCC) - if(global_ubo.flt_taa_variance > 0) - { - float variance_scale = 0.7;// global_ubo.flt_taa_variance; // TAAU switch hack - - if(checkerboard_flags == (CHECKERBOARD_FLAG_REFLECTION | CHECKERBOARD_FLAG_REFRACTION)) - variance_scale *= 2; - - vec3 sigma = sqrt(max(vec3(0), mom2 - mom1 * mom1)); - vec3 mi = mom1 - sigma * variance_scale; - vec3 ma = mom1 + sigma * variance_scale; - - color_prev = clamp(color_prev, mi, ma); - } - - // Mix the new color with the clamped previous color - color_output = mix(color_center, color_prev, clamp(global_ubo.flt_taa_history_weight, 0, 0.999)); - } - } - - linear_color_output = PQDecode(color_output); - } - else if(global_ubo.temporal_blend_factor > 0) - { - // Temporal accumulation in reference path tracing mode. - // The frame is supposed to be static (paused), so no motion vectors or high quality sampling. - // The accumulator is an RGBA32_FLOAT texture for higher accuracy. - - if(global_ubo.temporal_blend_factor < 1) - { - vec3 prev_color = imageLoad(IMG_HQ_COLOR_INTERLEAVED, ipos).rgb; - linear_color_output = mix(prev_color, linear_color_output, global_ubo.temporal_blend_factor); - } - - imageStore(IMG_HQ_COLOR_INTERLEAVED, ipos, vec4(linear_color_output, 0)); - - color_output = PQEncode(linear_color_output); - } - - bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.width / 2, global_ubo.height / 2))); - if(is_readback_pixel) - { - readback.hdr_color = linear_color_output; - } - - imageStore(IMG_ASVGF_TAA_A, ipos, vec4(color_output, 0)); - imageStore(IMG_TAA_OUTPUT, ipos, vec4(linear_color_output, 1)); -} diff --git a/src/refresh/vkpt/shader/asvgf_taau.comp b/src/refresh/vkpt/shader/asvgf_taau.comp index bc7cbd029..ab9ed462b 100644 --- a/src/refresh/vkpt/shader/asvgf_taau.comp +++ b/src/refresh/vkpt/shader/asvgf_taau.comp @@ -149,9 +149,10 @@ float get_sample_weight(vec2 delta, float scale) vec2 hires_to_lores(ivec2 ipos) { - vec2 scale = vec2(global_ubo.width * global_ubo.inv_unscaled_width, global_ubo.height * global_ubo.inv_unscaled_height); + vec2 input_size = vec2(global_ubo.width, global_ubo.height); + vec2 output_size = vec2(global_ubo.taa_output_width, global_ubo.taa_output_height); - return (vec2(ipos) + vec2(0.5)) * scale - vec2(0.5) - global_ubo.sub_pixel_jitter; + return (vec2(ipos) + vec2(0.5)) * (input_size / output_size) - vec2(0.5) - global_ubo.sub_pixel_jitter; } void @@ -167,7 +168,7 @@ main() preload(group_base_lores, group_bottomright_lores - group_base_lores + ivec2(1)); barrier(); - if (ipos.x >= global_ubo.unscaled_width || ipos.y >= global_ubo.unscaled_height) + if (ipos.x >= global_ubo.taa_output_width || ipos.y >= global_ubo.taa_output_height) { imageStore(IMG_TAA_OUTPUT, ipos, vec4(0)); return; @@ -185,9 +186,9 @@ main() vec3 color_output = color_center; vec3 linear_color_output; - if(global_ubo.flt_taa != 0) + if(global_ubo.flt_taa != AA_MODE_OFF) { - // Regular TAA mode + // Regular TAA/TAAU mode vec3 mom1; vec3 mom2; @@ -232,12 +233,15 @@ main() } } - motion *= vec2(global_ubo.unscaled_width, global_ubo.unscaled_height); - - vec2 pos_prev = vec2(ipos) + vec2(0.5) + motion.xy; + // Calculate the previous position, taking into account that the previous frame output can have different size from the current frame + vec2 pos_prev = ((vec2(ipos) + vec2(0.5)) / vec2(global_ubo.taa_output_width, global_ubo.taa_output_height) + motion.xy) + * vec2(global_ubo.prev_taa_output_width, global_ubo.prev_taa_output_height); + + // Scale the motion for the weight calculation below + motion *= vec2(global_ubo.taa_output_width, global_ubo.taa_output_height); if(all(greaterThanEqual(ivec2(pos_prev), ivec2(1))) - && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - 1))) + && all(lessThan(ivec2(pos_prev), ivec2(global_ubo.taa_output_width, global_ubo.taa_output_height) - 1))) { // Motion vector was valid - sample the previous frame vec3 color_prev = sample_texture_catmull_rom(TEX_ASVGF_TAA_B, pos_prev).rgb; @@ -261,7 +265,7 @@ main() // Mix the new color with the clamped previous color float motion_weight = smoothstep(0, 1.0f, sqrt(dot(motion, motion))); - float sample_weight = get_sample_weight(nearest_render_pos - int_render_pos, global_ubo.unscaled_width * global_ubo.inv_width); + float sample_weight = get_sample_weight(nearest_render_pos - int_render_pos, global_ubo.taa_output_width * global_ubo.inv_width); float pixel_weight = max(motion_weight, sample_weight) * 0.1f; pixel_weight = clamp(pixel_weight, 0, 1); color_output = mix(color_prev, color_center, pixel_weight); @@ -288,6 +292,10 @@ main() color_output = PQEncode(linear_color_output); } + else + { + linear_color_output = PQDecode(color_output); + } bool is_readback_pixel = all(equal(ipos, ivec2(global_ubo.unscaled_width / 2, global_ubo.unscaled_height / 2))); if(is_readback_pixel) diff --git a/src/refresh/vkpt/shader/bloom_blur.comp b/src/refresh/vkpt/shader/bloom_blur.comp index 816112e74..30a11bb04 100644 --- a/src/refresh/vkpt/shader/bloom_blur.comp +++ b/src/refresh/vkpt/shader/bloom_blur.comp @@ -39,7 +39,7 @@ layout (push_constant) uniform push_constant_block { vec2 img_to_uv(vec2 ipos) { - vec2 uv = ipos / vec2(global_ubo.taa_width / 4, global_ubo.taa_height / 4); + vec2 uv = ipos / vec2(global_ubo.taa_image_width / 4, global_ubo.taa_image_height / 4); return uv; } @@ -72,7 +72,7 @@ void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - ivec2 bloom_extent = ivec2(global_ubo.taa_width / 4, global_ubo.taa_height / 4); + ivec2 bloom_extent = ivec2(global_ubo.taa_output_width / 4, global_ubo.taa_output_height / 4); if(any(greaterThanEqual(ipos, bloom_extent))) { diff --git a/src/refresh/vkpt/shader/bloom_composite.comp b/src/refresh/vkpt/shader/bloom_composite.comp index efebfa453..026c870ea 100644 --- a/src/refresh/vkpt/shader/bloom_composite.comp +++ b/src/refresh/vkpt/shader/bloom_composite.comp @@ -30,7 +30,7 @@ layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in; vec2 img_to_uv(ivec2 ipos) { - vec2 uv = (vec2(ipos) + vec2(0.5, 0.5)) / vec2(global_ubo.taa_width, global_ubo.taa_height); + vec2 uv = (vec2(ipos) + vec2(0.5, 0.5)) / vec2(global_ubo.taa_image_width, global_ubo.taa_image_height); uv = clamp(uv, 0, 1); return uv; } @@ -39,7 +39,7 @@ vec2 img_to_uv(ivec2 ipos) void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - if(any(greaterThanEqual(ipos, ivec2(global_ubo.taa_width, global_ubo.taa_height)))) + if(any(greaterThanEqual(ipos, ivec2(global_ubo.taa_output_width, global_ubo.taa_output_height)))) return; vec2 uv = img_to_uv(ipos); diff --git a/src/refresh/vkpt/shader/constants.h b/src/refresh/vkpt/shader/constants.h index b527c2d84..ea1026d13 100644 --- a/src/refresh/vkpt/shader/constants.h +++ b/src/refresh/vkpt/shader/constants.h @@ -22,9 +22,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #define GRAD_DWN (3) #define SHADOWMAP_SIZE 4096 -#define TERRAIN_SHADOWMAP_SIZE 2048 - -#define USE_NEAREST_TEXTURE_FILTER 0 #define HISTOGRAM_BINS 128 @@ -47,6 +44,10 @@ with this program; if not, write to the Free Software Foundation, Inc., #define MAX_CAMERAS 8 +#define AA_MODE_OFF 0 +#define AA_MODE_TAA 1 +#define AA_MODE_UPSCALE 2 + // Scaling factors for lighting components when they are stored in textures. // FP16 and RGBE textures have very limited range, and these factors help bring the signal within that range. #define STORAGE_SCALE_LF 1024 diff --git a/src/refresh/vkpt/shader/global_textures.h b/src/refresh/vkpt/shader/global_textures.h index 1a0b8f0c1..234d28a38 100644 --- a/src/refresh/vkpt/shader/global_textures.h +++ b/src/refresh/vkpt/shader/global_textures.h @@ -32,6 +32,9 @@ with this program; if not, write to the Free Software Foundation, Inc., #define IMG_HEIGHT_GRAD ((qvk.extent_screen_images.height + GRAD_DWN - 1) / GRAD_DWN) #define IMG_WIDTH_GRAD_MGPU ((qvk.extent_screen_images.width + GRAD_DWN - 1) / GRAD_DWN / qvk.device_count) +#define IMG_WIDTH_TAA (qvk.extent_taa_images.width) +#define IMG_HEIGHT_TAA (qvk.extent_taa_images.height) + /* These are images that are to be used as render targets and buffers, but not textures. */ #define LIST_IMAGES \ IMG_DO(PT_ALBEDO, 0, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -57,10 +60,10 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(FLAT_COLOR, 20, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(FLAT_MOTION, 21, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(PT_GODRAYS_THROUGHPUT_DIST,22, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(BLOOM_DOWNSCALE_MIP_1, 23, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 2, qvk.extent_taa.height / 2 ) \ - IMG_DO(BLOOM_HBLUR, 24, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ - IMG_DO(BLOOM_VBLUR, 25, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width / 4, qvk.extent_taa.height / 4 ) \ - IMG_DO(TAA_OUTPUT, 26, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(BLOOM_DOWNSCALE_MIP_1, 23, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA / 2, IMG_HEIGHT_TAA / 2 ) \ + IMG_DO(BLOOM_HBLUR, 24, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA / 4, IMG_HEIGHT_TAA / 4 ) \ + IMG_DO(BLOOM_VBLUR, 25, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA / 4, IMG_HEIGHT_TAA / 4 ) \ + IMG_DO(TAA_OUTPUT, 26, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ IMG_DO(PT_VIEW_DIRECTION, 27, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_VIEW_DIRECTION2, 28, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(PT_THROUGHPUT, 29, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -89,8 +92,8 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(ASVGF_FILTERED_SPEC_B, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_TAA_A, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ + IMG_DO(ASVGF_TAA_B, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ IMG_DO(ASVGF_RNG_SEED_A, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_B, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ @@ -117,8 +120,8 @@ with this program; if not, write to the Free Software Foundation, Inc., IMG_DO(ASVGF_FILTERED_SPEC_A, 50, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_B, 51, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_MOMENTS_HF_A, 52, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ - IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, qvk.extent_taa.width, qvk.extent_taa.height ) \ + IMG_DO(ASVGF_TAA_B, 53, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ + IMG_DO(ASVGF_TAA_A, 54, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ IMG_DO(ASVGF_RNG_SEED_B, 55, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_RNG_SEED_A, 56, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, 57, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index e229c3d2b..e2cfaa341 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -58,7 +58,7 @@ with this program; if not, write to the Free Software Foundation, Inc., UBO_CVAR_DO(flt_scale_overlay, 1.0) /* scale for transparent and emissive objects visible with primary rays */ \ UBO_CVAR_DO(flt_scale_spec, 1) \ UBO_CVAR_DO(flt_show_gradients, 0) /* switch for showing the gradient values as overlay image, 0 or 1 */ \ - UBO_CVAR_DO(flt_taa, 1) /* switch for temporal AA, 0 or 1 */ \ + UBO_CVAR_DO(flt_taa, AA_MODE_UPSCALE) /* temporal anti-aliasing mode: 0 = off, 1 = regular TAA, 2 = temporal upscale */ \ UBO_CVAR_DO(flt_taa_anti_sparkle, 0.25) /* strength of the anti-sparkle filter of TAA, [0..1] */ \ UBO_CVAR_DO(flt_taa_variance, 1.0) /* temporal AA variance window scale, 0 means disable NCC, [0..inf) */ \ UBO_CVAR_DO(flt_taa_history_weight, 0.95) /* temporal AA weight of the history sample, [0..1) */ \ @@ -176,23 +176,19 @@ with this program; if not, write to the Free Software Foundation, Inc., GLOBAL_UBO_VAR_LIST_DO(float, inv_width) \ GLOBAL_UBO_VAR_LIST_DO(float, inv_height) \ \ - GLOBAL_UBO_VAR_LIST_DO(float, unscaled_width) \ - GLOBAL_UBO_VAR_LIST_DO(float, unscaled_height) \ - GLOBAL_UBO_VAR_LIST_DO(float, inv_unscaled_width) \ - GLOBAL_UBO_VAR_LIST_DO(float, inv_unscaled_height) \ + GLOBAL_UBO_VAR_LIST_DO(int, unscaled_width) \ + GLOBAL_UBO_VAR_LIST_DO(int, unscaled_height) \ + GLOBAL_UBO_VAR_LIST_DO(int, taa_image_width) \ + GLOBAL_UBO_VAR_LIST_DO(int, taa_image_height) \ + \ + GLOBAL_UBO_VAR_LIST_DO(int, taa_output_width) \ + GLOBAL_UBO_VAR_LIST_DO(int, taa_output_height) \ + GLOBAL_UBO_VAR_LIST_DO(int, prev_taa_output_width) \ + GLOBAL_UBO_VAR_LIST_DO(int, prev_taa_output_height) \ \ GLOBAL_UBO_VAR_LIST_DO(vec2, sub_pixel_jitter) \ - GLOBAL_UBO_VAR_LIST_DO(vec2, prev_sub_pixel_jitter) \ - \ - GLOBAL_UBO_VAR_LIST_DO(float, prev_adapted_luminance) \ + GLOBAL_UBO_VAR_LIST_DO(float, prev_adapted_luminance) \ GLOBAL_UBO_VAR_LIST_DO(float, padding1) \ - GLOBAL_UBO_VAR_LIST_DO(float, padding2) \ - GLOBAL_UBO_VAR_LIST_DO(float, padding3) \ - \ - GLOBAL_UBO_VAR_LIST_DO(float, taa_width) \ - GLOBAL_UBO_VAR_LIST_DO(float, taa_height) \ - GLOBAL_UBO_VAR_LIST_DO(float, inv_taa_width) \ - GLOBAL_UBO_VAR_LIST_DO(float, inv_taa_height) \ \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_center) \ GLOBAL_UBO_VAR_LIST_DO(vec4, world_size) \ diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 7e3c667a0..6dac7ca14 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -123,7 +123,7 @@ main() vec3 direction; vec2 pixel_offset; - if(global_ubo.flt_taa == 1 || global_ubo.temporal_blend_factor > 0) + if(global_ubo.flt_taa == AA_MODE_TAA || global_ubo.temporal_blend_factor > 0) { // Photo mode or legacy TAA - use higher quality sampling pixel_offset = vec2(get_rng(RNG_PRIMARY_OFF_X), get_rng(RNG_PRIMARY_OFF_Y)); diff --git a/src/refresh/vkpt/shader/tone_mapping_apply.comp b/src/refresh/vkpt/shader/tone_mapping_apply.comp index 7c86fcbf8..0ae5ee9f7 100644 --- a/src/refresh/vkpt/shader/tone_mapping_apply.comp +++ b/src/refresh/vkpt/shader/tone_mapping_apply.comp @@ -121,9 +121,7 @@ vec3 srgb_dither(vec3 color, ivec2 ipos) void main() { ivec2 ipos = ivec2(gl_GlobalInvocationID); - const ivec2 screenSize = (global_ubo.flt_taa == 2) - ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - : ivec2(global_ubo.taa_width, global_ubo.taa_width); + const ivec2 screenSize = ivec2(global_ubo.taa_output_width, global_ubo.taa_output_height); if(any(greaterThanEqual(ipos, screenSize))) return; diff --git a/src/refresh/vkpt/shader/tone_mapping_histogram.comp b/src/refresh/vkpt/shader/tone_mapping_histogram.comp index 38546de25..f3508dd29 100644 --- a/src/refresh/vkpt/shader/tone_mapping_histogram.comp +++ b/src/refresh/vkpt/shader/tone_mapping_histogram.comp @@ -172,9 +172,7 @@ shared uint s_Histogram[HISTOGRAM_BINS]; void main() { const ivec2 ipos = ivec2(gl_GlobalInvocationID); - const ivec2 screenSize = (global_ubo.flt_taa == 2) - ? ivec2(global_ubo.unscaled_width, global_ubo.unscaled_height) - : ivec2(global_ubo.width, global_ubo.width); + const ivec2 screenSize = ivec2(global_ubo.taa_output_width, global_ubo.taa_output_height); if(any(greaterThanEqual(ipos, screenSize))) return; diff --git a/src/refresh/vkpt/tone_mapping.c b/src/refresh/vkpt/tone_mapping.c index 001baf57f..59d6322bb 100644 --- a/src/refresh/vkpt/tone_mapping.c +++ b/src/refresh/vkpt/tone_mapping.c @@ -70,8 +70,6 @@ static VkPipelineLayout pipeline_layout_tone_mapping_curve; static VkPipelineLayout pipeline_layout_tone_mapping_apply; static int reset_required = 1; // If 1, recomputes tone curve based only on this frame -extern cvar_t* cvar_flt_taa; - // Creates our pipeline layouts. VkResult vkpt_tone_mapping_initialize() @@ -261,8 +259,8 @@ vkpt_tone_mapping_record_cmd_buffer(VkCommandBuffer cmd_buf, float frame_time) pipeline_layout_tone_mapping_histogram, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, - (qvk.extent_taa.width + 15) / 16, - (qvk.extent_taa.height + 15) / 16, + (qvk.extent_taa_output.width + 15) / 16, + (qvk.extent_taa_output.height + 15) / 16, 1); BUFFER_BARRIER(cmd_buf, @@ -385,11 +383,9 @@ vkpt_tone_mapping_record_cmd_buffer(VkCommandBuffer cmd_buf, float frame_time) vkCmdPushConstants(cmd_buf, pipeline_layout_tone_mapping_apply, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants_tm2_apply), push_constants_tm2_apply); - qboolean taau = cvar_flt_taa->integer == 2; - vkCmdDispatch(cmd_buf, - ((taau ? qvk.extent_unscaled.width : qvk.extent_render.width) + 15) / 16, - ((taau ? qvk.extent_unscaled.height : qvk.extent_render.height) + 15) / 16, + (qvk.extent_taa_output.width + 15) / 16, + (qvk.extent_taa_output.height + 15) / 16, 1); // Because VKPT_IMG_TAA_OUTPUT changed, we make sure to wait for the image diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 2f1285675..f3d553c0a 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -91,7 +91,6 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_ASVGF_ATROUS_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_LF_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_TEMPORAL_COMP) \ - SHADER_MODULE_DO(QVK_MOD_ASVGF_TAA_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_TAAU_COMP) \ SHADER_MODULE_DO(QVK_MOD_BLOOM_BLUR_COMP) \ SHADER_MODULE_DO(QVK_MOD_BLOOM_COMPOSITE_COMP) \ @@ -184,7 +183,8 @@ typedef struct QVK_s { VkExtent2D extent_render; VkExtent2D extent_render_prev; VkExtent2D extent_unscaled; - VkExtent2D extent_taa; + VkExtent2D extent_taa_images; + VkExtent2D extent_taa_output; uint32_t gpu_slice_width; uint32_t gpu_slice_width_prev; uint32_t num_swap_chain_images; @@ -212,6 +212,8 @@ typedef struct QVK_s { int win_height; uint64_t frame_counter; + int effective_aa_mode; + SDL_Window *window; uint32_t num_sdl2_extensions; const char **sdl2_extensions; From 5406a1bf6c608154055a7a429aacacfe72eff3e1 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 2 Dec 2020 16:51:20 -0800 Subject: [PATCH 24/48] Added support for the final official KHR ray tracing extensions (v11). Squashed from a feature branch. Both NV_ray_tracing and KHR_ray_tracing_pipeline etc. are supported. When both are available, the KHR extensions are preferred, unless cvar 'nv_ray_tracing' is nonzero. Includes work done by Joshua Ashton in https://github.com/NVIDIA/Q2RTX/pull/73 --- cmake/compileShaders.cmake | 32 +- extern/Vulkan-Headers | 2 +- src/CMakeLists.txt | 31 +- src/client/refresh.c | 3 + src/refresh/gl/main.c | 2 - src/refresh/vkpt/asvgf.c | 31 +- src/refresh/vkpt/device_memory_allocator.c | 6 +- src/refresh/vkpt/draw.c | 2 +- src/refresh/vkpt/god_rays.c | 6 +- src/refresh/vkpt/main.c | 262 ++- src/refresh/vkpt/path_tracer.c | 1815 ++++++++++------- src/refresh/vkpt/precomputed_sky.c | 6 +- src/refresh/vkpt/shader/direct_lighting.rgen | 6 +- .../vkpt/shader/indirect_lighting.rgen | 4 +- src/refresh/vkpt/shader/path_tracer.h | 29 + src/refresh/vkpt/shader/path_tracer.rchit | 12 +- src/refresh/vkpt/shader/path_tracer.rmiss | 2 +- .../vkpt/shader/path_tracer_beam.rahit | 12 +- .../vkpt/shader/path_tracer_explosion.rahit | 14 +- .../vkpt/shader/path_tracer_particle.rahit | 10 +- src/refresh/vkpt/shader/path_tracer_rgen.h | 24 +- .../vkpt/shader/path_tracer_shadow.rmiss | 2 +- .../vkpt/shader/path_tracer_sprite.rahit | 10 +- src/refresh/vkpt/shader/primary_rays.rgen | 6 +- src/refresh/vkpt/shader/reflect_refract.rgen | 6 +- src/refresh/vkpt/textures.c | 22 +- src/refresh/vkpt/transparency.c | 597 ++---- src/refresh/vkpt/vertex_buffer.c | 6 +- src/refresh/vkpt/vk_util.c | 54 +- src/refresh/vkpt/vk_util.h | 3 + src/refresh/vkpt/vkpt.h | 132 +- vulkan-1.lib | Bin 48602 -> 52366 bytes 32 files changed, 1648 insertions(+), 1501 deletions(-) diff --git a/cmake/compileShaders.cmake b/cmake/compileShaders.cmake index 25c71803f..3360160e8 100644 --- a/cmake/compileShaders.cmake +++ b/cmake/compileShaders.cmake @@ -29,8 +29,8 @@ message(STATUS "Glslang compiler : ${GLSLANG_COMPILER}") function(compile_shader) set(options "") - set(oneValueArgs SOURCE_FILE OUTPUT_FILE_LIST) - set(multiValueArgs "") + set(oneValueArgs SOURCE_FILE OUTPUT_FILE_NAME OUTPUT_FILE_LIST) + set(multiValueArgs DEFINES) cmake_parse_arguments(params "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) if (NOT params_SOURCE_FILE) @@ -42,37 +42,23 @@ function(compile_shader) endif() set(src_file "${CMAKE_CURRENT_SOURCE_DIR}/${params_SOURCE_FILE}") - get_filename_component(base_file_name ${src_file} NAME) - get_filename_component(file_extension ${src_file} EXT) - if (file_extension STREQUAL ".comp") - set(DEFINES "-DSHADER_STAGE_COMP") - elseif(file_extension STREQUAL ".rahit") - set(DEFINES "-DSHADER_STAGE_ACHIT") - elseif(file_extension STREQUAL ".rmiss") - set(DEFINES "-DSHADER_STAGE_RMISS") - elseif(file_extension STREQUAL ".rchit") - set(DEFINES "-DSHADER_STAGE_RCHIT") - elseif(file_extension STREQUAL ".rgen") - set(DEFINES "-DSHADER_STAGE_RGEN") - elseif(file_extension STREQUAL ".frag") - set(DEFINES "-DSHADER_STAGE_FRAG") - elseif(file_extension STREQUAL ".vert") - set(DEFINES "-DSHADER_STAGE_VERT") + if (params_OUTPUT_FILE_NAME) + set(output_file_name ${params_OUTPUT_FILE_NAME}) else() - message(FATAL_ERROR "unknown extension in shader source file: ${file_extension}") + get_filename_component(output_file_name ${src_file} NAME) endif() - + set_source_files_properties(${src_file} PROPERTIES VS_TOOL_OVERRIDE "None") set (out_dir "${CMAKE_SOURCE_DIR}/baseq2/shader_vkpt") - set (out_file "${out_dir}/${base_file_name}.spv") + set (out_file "${out_dir}/${output_file_name}.spv") set(glslang_command_line - --target-env vulkan1.1 + --target-env vulkan1.2 -DVKPT_SHADER -V - ${DEFINES} + ${params_DEFINES} "${src_file}" -o "${out_file}") diff --git a/extern/Vulkan-Headers b/extern/Vulkan-Headers index fb7f9c9bc..87aaa16d4 160000 --- a/extern/Vulkan-Headers +++ b/extern/Vulkan-Headers @@ -1 +1 @@ -Subproject commit fb7f9c9bcd1d1544ea203a1f3d4253d0e90c5a90 +Subproject commit 87aaa16d4c8e1ac70f8f04acdcd46eed4bd77209 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 684577883..1415f428e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -297,17 +297,7 @@ set(SRC_SHADERS refresh/vkpt/shader/bloom_composite.comp refresh/vkpt/shader/bloom_blur.comp refresh/vkpt/shader/compositing.comp - refresh/vkpt/shader/primary_rays.rgen - refresh/vkpt/shader/direct_lighting.rgen refresh/vkpt/shader/checkerboard_interleave.comp - refresh/vkpt/shader/indirect_lighting.rgen - refresh/vkpt/shader/path_tracer.rchit - refresh/vkpt/shader/path_tracer.rmiss - refresh/vkpt/shader/path_tracer_particle.rahit - refresh/vkpt/shader/path_tracer_sprite.rahit - refresh/vkpt/shader/path_tracer_beam.rahit - refresh/vkpt/shader/path_tracer_explosion.rahit - refresh/vkpt/shader/path_tracer_shadow.rmiss refresh/vkpt/shader/asvgf_atrous.comp refresh/vkpt/shader/asvgf_gradient_atrous.comp refresh/vkpt/shader/asvgf_gradient_img.comp @@ -321,7 +311,6 @@ set(SRC_SHADERS refresh/vkpt/shader/tone_mapping_apply.comp refresh/vkpt/shader/physical_sky.comp refresh/vkpt/shader/physical_sky_space.comp - refresh/vkpt/shader/reflect_refract.rgen refresh/vkpt/shader/shadow_map.vert refresh/vkpt/shader/sky_buffer_resolve.comp refresh/vkpt/shader/stretch_pic.frag @@ -330,9 +319,29 @@ set(SRC_SHADERS refresh/vkpt/shader/final_blit.vert ) +set(SRC_RT_SHADERS + refresh/vkpt/shader/primary_rays.rgen + refresh/vkpt/shader/direct_lighting.rgen + refresh/vkpt/shader/indirect_lighting.rgen + refresh/vkpt/shader/path_tracer.rchit + refresh/vkpt/shader/path_tracer.rmiss + refresh/vkpt/shader/path_tracer_particle.rahit + refresh/vkpt/shader/path_tracer_sprite.rahit + refresh/vkpt/shader/path_tracer_beam.rahit + refresh/vkpt/shader/path_tracer_explosion.rahit + refresh/vkpt/shader/path_tracer_shadow.rmiss + refresh/vkpt/shader/reflect_refract.rgen +) + include(../cmake/compileShaders.cmake) if (GLSLANG_COMPILER) + foreach(s ${SRC_RT_SHADERS}) + get_filename_component(shader_name ${s} NAME) + compile_shader(SOURCE_FILE ${s} OUTPUT_FILE_LIST shader_bytecode OUTPUT_FILE_NAME "${shader_name}.nv" DEFINES "-DNV_RAY_TRACING") + compile_shader(SOURCE_FILE ${s} OUTPUT_FILE_LIST shader_bytecode OUTPUT_FILE_NAME "${shader_name}.khr" DEFINES "-DKHR_RAY_TRACING") + endforeach() + foreach(s ${SRC_SHADERS}) compile_shader(SOURCE_FILE ${s} OUTPUT_FILE_LIST shader_bytecode) endforeach() diff --git a/src/client/refresh.c b/src/client/refresh.c index 677fb14bf..b64699fa8 100644 --- a/src/client/refresh.c +++ b/src/client/refresh.c @@ -35,6 +35,9 @@ cvar_t *_vid_fullscreen; cvar_t *vid_display; cvar_t *vid_displaylist; +// used in gl and vkpt renderers +int registration_sequence; + #define MODE_GEOMETRY 1 #define MODE_FULLSCREEN 2 #define MODE_MODELIST 4 diff --git a/src/refresh/gl/main.c b/src/refresh/gl/main.c index e90653409..21d0b7a3a 100644 --- a/src/refresh/gl/main.c +++ b/src/refresh/gl/main.c @@ -31,8 +31,6 @@ statCounters_t c; entity_t gl_world; -int registration_sequence; - // regular variables cvar_t *gl_partscale; cvar_t *gl_partstyle; diff --git a/src/refresh/vkpt/asvgf.c b/src/refresh/vkpt/asvgf.c index 470ab058d..164fea857 100644 --- a/src/refresh/vkpt/asvgf.c +++ b/src/refresh/vkpt/asvgf.c @@ -136,22 +136,22 @@ vkpt_asvgf_create_pipelines() [ATROUS_ITER_0] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE_SPEC(QVK_MOD_ASVGF_ATROUS_COMP, VK_SHADER_STAGE_COMPUTE_BIT, &specInfo[0]), - .layout = pipeline_layout_atrous, + .layout = pipeline_layout_general, }, [ATROUS_ITER_1] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE_SPEC(QVK_MOD_ASVGF_ATROUS_COMP, VK_SHADER_STAGE_COMPUTE_BIT, &specInfo[1]), - .layout = pipeline_layout_atrous, + .layout = pipeline_layout_general, }, [ATROUS_ITER_2] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE_SPEC(QVK_MOD_ASVGF_ATROUS_COMP, VK_SHADER_STAGE_COMPUTE_BIT, &specInfo[2]), - .layout = pipeline_layout_atrous, + .layout = pipeline_layout_general, }, [ATROUS_ITER_3] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, .stage = SHADER_STAGE_SPEC(QVK_MOD_ASVGF_ATROUS_COMP, VK_SHADER_STAGE_COMPUTE_BIT, &specInfo[3]), - .layout = pipeline_layout_atrous, + .layout = pipeline_layout_general, }, [CHECKERBOARD_INTERLEAVE] = { .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, @@ -247,7 +247,7 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) /* create gradient image */ vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[GRADIENT_IMAGE]); vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_layout_atrous, 0, LENGTH(desc_sets), desc_sets, 0, 0); + pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, (qvk.gpu_slice_width / GRAD_DWN + 15) / 16, (qvk.extent_render.height / GRAD_DWN + 15) / 16, @@ -257,7 +257,7 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_GRAD_LF_PING]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_ASVGF_GRAD_HF_SPEC_PING]); - //vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[GRADIENT_ATROUS]); + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[GRADIENT_ATROUS]); vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout_atrous, 0, LENGTH(desc_sets), desc_sets, 0, 0); @@ -271,8 +271,6 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) vkCmdPushConstants(cmd_buf, pipeline_layout_atrous, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[GRADIENT_ATROUS]); - vkCmdDispatch(cmd_buf, (qvk.gpu_slice_width / GRAD_DWN + 15) / 16, (qvk.extent_render.height / GRAD_DWN + 15) / 16, @@ -288,7 +286,7 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) /* temporal accumulation / filtering */ vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[TEMPORAL]); vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_layout_atrous, 0, LENGTH(desc_sets), desc_sets, 0, 0); + pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); vkCmdDispatch(cmd_buf, (qvk.gpu_slice_width + 14) / 15, (qvk.extent_render.height + 14) / 15, @@ -306,10 +304,6 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) END_PERF_MARKER(cmd_buf, PROFILER_ASVGF_TEMPORAL); BEGIN_PERF_MARKER(cmd_buf, PROFILER_ASVGF_ATROUS); - //vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[ATROUS]); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, - pipeline_layout_atrous, 0, LENGTH(desc_sets), desc_sets, 0, 0); - /* spatial reconstruction filtering */ const int num_atrous_iterations = 4; for(int i = 0; i < num_atrous_iterations; i++) @@ -320,11 +314,14 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) i }; + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[ATROUS_LF]); + + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout_atrous, 0, LENGTH(desc_sets), desc_sets, 0, 0); + vkCmdPushConstants(cmd_buf, pipeline_layout_atrous, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants); - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[ATROUS_LF]); - vkCmdDispatch(cmd_buf, (qvk.gpu_slice_width / GRAD_DWN + 15) / 16, (qvk.extent_render.height / GRAD_DWN + 15) / 16, @@ -340,6 +337,10 @@ vkpt_asvgf_filter(VkCommandBuffer cmd_buf, qboolean enable_lf) int specialization = ATROUS_ITER_0 + i; vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_asvgf[specialization]); + + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline_layout_general, 0, LENGTH(desc_sets), desc_sets, 0, 0); + vkCmdDispatch(cmd_buf, (qvk.gpu_slice_width + 15) / 16, (qvk.extent_render.height + 15) / 16, diff --git a/src/refresh/vkpt/device_memory_allocator.c b/src/refresh/vkpt/device_memory_allocator.c index e454fc7e0..b3844b6d2 100644 --- a/src/refresh/vkpt/device_memory_allocator.c +++ b/src/refresh/vkpt/device_memory_allocator.c @@ -133,9 +133,9 @@ int create_sub_allocator(DeviceMemoryAllocator* allocator, uint32_t memory_type) }; #ifdef VKPT_DEVICE_GROUPS - VkMemoryAllocateFlagsInfoKHR mem_alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR, - .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR, + VkMemoryAllocateFlagsInfo mem_alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, .deviceMask = (1 << qvk.device_count) - 1 }; diff --git a/src/refresh/vkpt/draw.c b/src/refresh/vkpt/draw.c index 0d36f829c..98f5c849f 100644 --- a/src/refresh/vkpt/draw.c +++ b/src/refresh/vkpt/draw.c @@ -30,7 +30,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define TEXNUM_WHITE (~0) #define MAX_STRETCH_PICS (1<<14) -drawStatic_t draw = { +static drawStatic_t draw = { .scale = 1.0f, .alpha_scale = 1.0f }; diff --git a/src/refresh/vkpt/god_rays.c b/src/refresh/vkpt/god_rays.c index 6536992e7..7875a38e9 100644 --- a/src/refresh/vkpt/god_rays.c +++ b/src/refresh/vkpt/god_rays.c @@ -228,9 +228,9 @@ static void create_image_views() { god_rays.shadow_image_view = vkpt_shadow_map_get_view(); - VkSamplerReductionModeCreateInfoEXT redutcion_create_info = { - .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO_EXT, - .reductionMode = VK_SAMPLER_REDUCTION_MODE_MIN_EXT + VkSamplerReductionModeCreateInfo redutcion_create_info = { + .sType = VK_STRUCTURE_TYPE_SAMPLER_REDUCTION_MODE_CREATE_INFO, + .reductionMode = VK_SAMPLER_REDUCTION_MODE_MIN }; const VkSamplerCreateInfo sampler_create_info = { diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 2ac9da840..9a50cc346 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -70,6 +70,7 @@ static int drs_current_scale = 0; static int drs_effective_scale = 0; cvar_t *cvar_min_driver_version = NULL; +cvar_t *cvar_nv_ray_tracing = NULL; extern uiStatic_t uis; @@ -143,11 +144,11 @@ VkptInit_t vkpt_initialization[] = { { "tonemap", vkpt_tone_mapping_initialize, vkpt_tone_mapping_destroy, VKPT_INIT_DEFAULT, 0 }, { "tonemap|", vkpt_tone_mapping_create_pipelines, vkpt_tone_mapping_destroy_pipelines, VKPT_INIT_RELOAD_SHADER, 0 }, - { "physicalSky", vkpt_physical_sky_initialize, vkpt_physical_sky_destroy, VKPT_INIT_DEFAULT, 0 }, + { "physicalSky", vkpt_physical_sky_initialize, vkpt_physical_sky_destroy, VKPT_INIT_DEFAULT, 0 }, { "physicalSky|", vkpt_physical_sky_create_pipelines, vkpt_physical_sky_destroy_pipelines, VKPT_INIT_RELOAD_SHADER, 0 }, - { "godrays", vkpt_initialize_god_rays, vkpt_destroy_god_rays, VKPT_INIT_DEFAULT, 0 }, - { "godrays|", vkpt_god_rays_create_pipelines, vkpt_god_rays_destroy_pipelines, VKPT_INIT_RELOAD_SHADER, 0 }, - { "godraysI", vkpt_god_rays_update_images, vkpt_god_rays_noop, VKPT_INIT_SWAPCHAIN_RECREATE, 0 }, + { "godrays", vkpt_initialize_god_rays, vkpt_destroy_god_rays, VKPT_INIT_DEFAULT, 0 }, + { "godrays|", vkpt_god_rays_create_pipelines, vkpt_god_rays_destroy_pipelines, VKPT_INIT_RELOAD_SHADER, 0 }, + { "godraysI", vkpt_god_rays_update_images, vkpt_god_rays_noop, VKPT_INIT_SWAPCHAIN_RECREATE, 0 }, }; void debug_output(const char* format, ...); @@ -326,7 +327,7 @@ vkpt_reload_shader() void vkpt_reload_textures() { - IMG_ReloadAll(); + IMG_ReloadAll(); } // @@ -356,7 +357,7 @@ vkpt_set_material() } char const * token = Cmd_Argc() > 1 ? Cmd_Argv(1) : NULL, - * value = Cmd_Argc() > 2 ? Cmd_Argv(2) : NULL; + * value = Cmd_Argc() > 2 ? Cmd_Argv(2) : NULL; MAT_SetPBRMaterialAttribute(mat, token, value); } @@ -377,7 +378,6 @@ vkpt_print_material() // // -int registration_sequence; vkpt_refdef_t vkpt_refdef = { .z_near = 1.0f, .z_far = 4096.0f, @@ -389,17 +389,16 @@ QVK_t qvk = { .frame_counter = 0, }; -#define _VK_INST_EXTENSION_DO(a) PFN_##a q##a; -_VK_INST_EXTENSION_LIST -#undef _VK_INST_EXTENSION_DO - -#define _VK_EXTENSION_DO(a) PFN_##a q##a; -_VK_EXTENSION_LIST -#undef _VK_EXTENSION_DO +#define VK_EXTENSION_DO(a) PFN_##a q##a = 0; +LIST_EXTENSIONS_KHR +LIST_EXTENSIONS_NV +LIST_EXTENSIONS_DEBUG +LIST_EXTENSIONS_INSTANCE +#undef VK_EXTENSION_DO #ifdef VKPT_ENABLE_VALIDATION const char *vk_requested_layers[] = { - "VK_LAYER_LUNARG_standard_validation" + "VK_LAYER_KHRONOS_validation" }; #endif @@ -411,7 +410,8 @@ const char *vk_requested_instance_extensions[] = { #endif }; -const char *vk_requested_device_extensions[] = { + +const char *vk_requested_device_extensions_nv[] = { VK_NV_RAY_TRACING_EXTENSION_NAME, VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME, @@ -425,13 +425,24 @@ const char *vk_requested_device_extensions[] = { #endif }; +const char *vk_requested_device_extensions_khr[] = { + VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, + VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, + VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME, + VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, + VK_KHR_SWAPCHAIN_EXTENSION_NAME, +#ifdef VKPT_ENABLE_VALIDATION + VK_EXT_DEBUG_MARKER_EXTENSION_NAME, +#endif +}; + static const VkApplicationInfo vk_app_info = { .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, .pApplicationName = "quake 2 pathtracing", .applicationVersion = VK_MAKE_VERSION(1, 0, 0), .pEngineName = "vkpt", .engineVersion = VK_MAKE_VERSION(1, 0, 0), - .apiVersion = VK_API_VERSION_1_1, + .apiVersion = VK_API_VERSION_1_2, }; /* use this to override file names */ @@ -611,8 +622,8 @@ out:; .imageExtent = qvk.extent_unscaled, .imageArrayLayers = 1, /* only needs to be changed for stereoscopic rendering */ .imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT - | VK_IMAGE_USAGE_TRANSFER_SRC_BIT - | VK_IMAGE_USAGE_TRANSFER_DST_BIT, + | VK_IMAGE_USAGE_TRANSFER_SRC_BIT + | VK_IMAGE_USAGE_TRANSFER_DST_BIT, .imageSharingMode = VK_SHARING_MODE_EXCLUSIVE, /* VK_SHARING_MODE_CONCURRENT if not using same queue */ .queueFamilyIndexCount = 0, .pQueueFamilyIndices = NULL, @@ -816,11 +827,11 @@ init_vulkan() return qfalse; } -#define _VK_INST_EXTENSION_DO(a) \ +#define VK_EXTENSION_DO(a) \ q##a = (PFN_##a) vkGetInstanceProcAddr(qvk.instance, #a); \ if (!q##a) { Com_EPrintf("warning: could not load instance function %s\n", #a); } - _VK_INST_EXTENSION_LIST -#undef _VK_INST_EXTENSION_DO + LIST_EXTENSIONS_INSTANCE +#undef VK_EXTENSION_DO /* setup debug callback */ VkDebugUtilsMessengerCreateInfoEXT dbg_create_info = { @@ -855,21 +866,21 @@ init_vulkan() uint32_t num_device_groups = 0; if (cvar_sli->integer) - _VK(qvkEnumeratePhysicalDeviceGroupsKHR(qvk.instance, &num_device_groups, NULL)); + _VK(vkEnumeratePhysicalDeviceGroups(qvk.instance, &num_device_groups, NULL)); - VkDeviceGroupDeviceCreateInfoKHR device_group_create_info; - VkPhysicalDeviceGroupPropertiesKHR device_group_info; + VkDeviceGroupDeviceCreateInfo device_group_create_info; + VkPhysicalDeviceGroupProperties device_group_info; if(num_device_groups > 0) { // we always use the first group num_device_groups = 1; - _VK(qvkEnumeratePhysicalDeviceGroupsKHR(qvk.instance, &num_device_groups, &device_group_info)); + _VK(vkEnumeratePhysicalDeviceGroups(qvk.instance, &num_device_groups, &device_group_info)); if (device_group_info.physicalDeviceCount > VKPT_MAX_GPUS) { return qfalse; } - device_group_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO_KHR; + device_group_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_GROUP_DEVICE_CREATE_INFO; device_group_create_info.pNext = NULL; device_group_create_info.physicalDeviceCount = device_group_info.physicalDeviceCount; device_group_create_info.pPhysicalDevices = device_group_info.physicalDevices; @@ -882,8 +893,12 @@ init_vulkan() #endif qvk.device_count = 1; - int picked_device = -1; - for(int i = 0; i < num_devices; i++) { + int picked_device_with_khr = -1; + int picked_device_with_nv = -1; + qvk.use_khr_ray_tracing = qfalse; + + for(int i = 0; i < num_devices; i++) + { VkPhysicalDeviceProperties dev_properties; VkPhysicalDeviceFeatures dev_features; vkGetPhysicalDeviceProperties(devices[i], &dev_properties); @@ -898,16 +913,48 @@ init_vulkan() vkEnumerateDeviceExtensionProperties(devices[i], NULL, &num_ext, ext_properties); Com_Printf("Supported Vulkan device extensions:\n"); - for(int j = 0; j < num_ext; j++) { + for(int j = 0; j < num_ext; j++) + { Com_Printf(" %s\n", ext_properties[j].extensionName); - if(!strcmp(ext_properties[j].extensionName, VK_NV_RAY_TRACING_EXTENSION_NAME)) { - if(picked_device < 0) - picked_device = i; + + if (!strcmp(ext_properties[j].extensionName, VK_NV_RAY_TRACING_EXTENSION_NAME)) + { + if (picked_device_with_nv < 0) + { + picked_device_with_nv = i; + } } + + if(!strcmp(ext_properties[j].extensionName, VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) + { + if (picked_device_with_khr < 0) + { + picked_device_with_khr = i; + } + } + } + } + + int picked_device = -1; + if ((!cvar_nv_ray_tracing->integer || picked_device_with_nv < 0) && picked_device_with_khr >= 0) + { + picked_device = picked_device_with_khr; + qvk.use_khr_ray_tracing = qtrue; + + if (cvar_nv_ray_tracing->integer) + { + Com_WPrintf("Use of %s is requested through cvar %s, but there is no GPU that supports it. Switching to KHR.\n", + VK_NV_RAY_TRACING_EXTENSION_NAME, cvar_nv_ray_tracing->name); } } + else if(picked_device_with_nv >= 0) + { + picked_device = picked_device_with_nv; + qvk.use_khr_ray_tracing = qfalse; + } - if(picked_device < 0) { + if (picked_device < 0) + { Com_Error(ERR_FATAL, "No ray tracing capable GPU found."); } @@ -918,6 +965,7 @@ init_vulkan() vkGetPhysicalDeviceProperties(devices[picked_device], &dev_properties); Com_Printf("Picked physical device %d: %s\n", picked_device, dev_properties.deviceName); + Com_Printf("Using %s\n", qvk.use_khr_ray_tracing ? VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME : VK_NV_RAY_TRACING_EXTENSION_NAME); #ifdef _WIN32 if (dev_properties.vendorID == 0x10de) // NVIDIA vendor ID @@ -1017,10 +1065,11 @@ init_vulkan() queue_create_info[num_create_queues++] = q; }; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT idx_features = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT, + VkPhysicalDeviceDescriptorIndexingFeatures idx_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES, .runtimeDescriptorArray = 1, .shaderSampledImageArrayNonUniformIndexing = 1, + .shaderStorageBufferArrayNonUniformIndexing = 1 }; #ifdef VKPT_DEVICE_GROUPS @@ -1029,11 +1078,26 @@ init_vulkan() idx_features.pNext = &device_group_create_info; } #endif + VkPhysicalDeviceRayTracingPipelineFeaturesKHR physical_device_rt_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR, + .pNext = &idx_features, + .rayTracingPipeline = VK_TRUE + }; + + VkPhysicalDeviceAccelerationStructureFeaturesKHR physical_device_as_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR, + .pNext = &physical_device_rt_features, + .accelerationStructure = VK_TRUE, + }; + + VkPhysicalDeviceBufferDeviceAddressFeatures physical_device_address_features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES, + .pNext = &physical_device_as_features, + .bufferDeviceAddress = VK_TRUE + }; VkPhysicalDeviceFeatures2 device_features = { .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2_KHR, - .pNext = &idx_features, - .features = { .robustBufferAccess = 1, .fullDrawIndexUint32 = 1, @@ -1096,11 +1160,22 @@ init_vulkan() .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO, .pNext = &device_features, .pQueueCreateInfos = queue_create_info, - .queueCreateInfoCount = num_create_queues, - .enabledExtensionCount = LENGTH(vk_requested_device_extensions), - .ppEnabledExtensionNames = vk_requested_device_extensions, + .queueCreateInfoCount = num_create_queues }; + if(qvk.use_khr_ray_tracing) + { + dev_create_info.enabledExtensionCount = LENGTH(vk_requested_device_extensions_khr); + dev_create_info.ppEnabledExtensionNames = vk_requested_device_extensions_khr; + device_features.pNext = &physical_device_address_features; + } + else + { + dev_create_info.enabledExtensionCount = LENGTH(vk_requested_device_extensions_nv); + dev_create_info.ppEnabledExtensionNames = vk_requested_device_extensions_nv; + device_features.pNext = &idx_features; + } + /* create device and queue */ result = vkCreateDevice(qvk.physical_device, &dev_create_info, NULL, &qvk.device); if (result != VK_SUCCESS) @@ -1113,11 +1188,26 @@ init_vulkan() vkGetDeviceQueue(qvk.device, qvk.queue_idx_compute, 0, &qvk.queue_compute); vkGetDeviceQueue(qvk.device, qvk.queue_idx_transfer, 0, &qvk.queue_transfer); -#define _VK_EXTENSION_DO(a) \ - q##a = (PFN_##a) vkGetDeviceProcAddr(qvk.device, #a); \ - if(!q##a) { Com_EPrintf("warning: could not load function %s\n", #a); } - _VK_EXTENSION_LIST -#undef _VK_EXTENSION_DO +#define VK_EXTENSION_DO(a) \ + q##a = (PFN_##a) vkGetDeviceProcAddr(qvk.device, #a); \ + if(!q##a) { Com_EPrintf("warning: could not load function %s\n", #a); } + + if (qvk.use_khr_ray_tracing) + { + LIST_EXTENSIONS_KHR + } + else + { + LIST_EXTENSIONS_NV + } + +#ifdef VKPT_ENABLE_VALIDATION + { + LIST_EXTENSIONS_DEBUG + } +#endif + +#undef VK_EXTENSION_DO Com_Printf("-----------------------\n"); @@ -1125,10 +1215,19 @@ init_vulkan() } static VkShaderModule -create_shader_module_from_file(const char *name, const char *enum_name) +create_shader_module_from_file(const char *name, const char *enum_name, qboolean is_rt_shader) { + const char* suffix = ""; + if (is_rt_shader) + { + if (qvk.use_khr_ray_tracing) + suffix = ".khr"; + else + suffix = ".nv"; + } + char path[1024]; - snprintf(path, sizeof path, SHADER_PATH_TEMPLATE, name ? name : (enum_name + 8)); + snprintf(path, sizeof path, "shader_vkpt/%s%s.spv", name ? name : (enum_name + 8), suffix); if(!name) { int len = 0; for(len = 0; path[len]; len++) @@ -1170,14 +1269,18 @@ vkpt_load_shader_modules() { VkResult ret = VK_SUCCESS; #define SHADER_MODULE_DO(a) do { \ - qvk.shader_modules[a] = create_shader_module_from_file(shader_module_file_names[a], #a); \ + qvk.shader_modules[a] = create_shader_module_from_file(shader_module_file_names[a], #a, IS_RT_SHADER); \ ret = (ret == VK_SUCCESS && qvk.shader_modules[a]) ? VK_SUCCESS : VK_ERROR_INITIALIZATION_FAILED; \ if(qvk.shader_modules[a]) { \ ATTACH_LABEL_VARIABLE_NAME((uint64_t)qvk.shader_modules[a], SHADER_MODULE, #a); \ }\ } while(0); +#define IS_RT_SHADER qfalse LIST_SHADER_MODULES +#define IS_RT_SHADER qtrue + LIST_RT_SHADER_MODULES +#undef IS_RT_SHADER #undef SHADER_MODULE_DO return ret; @@ -1186,13 +1289,11 @@ vkpt_load_shader_modules() VkResult vkpt_destroy_shader_modules() { -#define SHADER_MODULE_DO(a) \ - vkDestroyShaderModule(qvk.device, qvk.shader_modules[a], NULL); \ - qvk.shader_modules[a] = VK_NULL_HANDLE; - - LIST_SHADER_MODULES - -#undef SHADER_MODULE_DO + for (int i = 0; i < NUM_QVK_SHADER_MODULES; i++) + { + vkDestroyShaderModule(qvk.device, qvk.shader_modules[i], NULL); + qvk.shader_modules[i] = VK_NULL_HANDLE; + } return VK_SUCCESS; } @@ -2296,7 +2397,7 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c void R_RenderFrame_RTX(refdef_t *fd) { - vkpt_refdef.fd = fd; + vkpt_refdef.fd = fd; qboolean render_world = (fd->rdflags & RDF_NOWORLDMODEL) == 0; static float previous_time = -1.f; @@ -2333,9 +2434,9 @@ R_RenderFrame_RTX(refdef_t *fd) if (prev_adapted_luminance <= 0.f) prev_adapted_luminance = 0.005f; - LOG_FUNC(); - if (!vkpt_refdef.bsp_mesh_world_loaded && render_world) - return; + LOG_FUNC(); + if (!vkpt_refdef.bsp_mesh_world_loaded && render_world) + return; vec3_t sky_matrix[3]; prepare_sky_matrix(fd->time, sky_matrix); @@ -2357,11 +2458,11 @@ R_RenderFrame_RTX(refdef_t *fd) num_model_lights = 0; EntityUploadInfo upload_info = { 0 }; - prepare_entities(&upload_info); - if (bsp_world_model) - { - vkpt_build_beam_lights(model_lights, &num_model_lights, MAX_MODEL_LIGHTS, bsp_world_model, fd->entities, fd->num_entities, prev_adapted_luminance); - } + prepare_entities(&upload_info); + if (bsp_world_model) + { + vkpt_build_beam_lights(model_lights, &num_model_lights, MAX_MODEL_LIGHTS, bsp_world_model, fd->entities, fd->num_entities, prev_adapted_luminance); + } QVKUniformBuffer_t *ubo = &vkpt_refdef.uniform_buffer; prepare_ubo(fd, viewleaf, &ref_mode, sky_matrix, render_world); @@ -2459,7 +2560,6 @@ R_RenderFrame_RTX(refdef_t *fd) BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_BVH_UPDATE); assert(upload_info.num_vertices % 3 == 0); - build_transparency_blas(trace_cmd_buf); vkpt_pt_create_all_dynamic(trace_cmd_buf, qvk.current_frame_index, &upload_info); vkpt_pt_create_toplevel(trace_cmd_buf, qvk.current_frame_index, render_world, upload_info.weapon_left_handed); vkpt_pt_update_descripter_set_bindings(qvk.current_frame_index); @@ -3018,7 +3118,10 @@ R_Init_RTX(qboolean total) // and the current test no longer works. cvar_min_driver_version = Cvar_Get("min_driver_version", "430.86", 0); - InitialiseSkyCVars(); + // When nonzero, the game will pick NV_ray_tracing if both NV and KHR extensions are available + cvar_nv_ray_tracing = Cvar_Get("nv_ray_tracing", "0", CVAR_REFRESH | CVAR_ARCHIVE); + + InitialiseSkyCVars(); if (MAT_InitializePBRmaterials() != Q_ERR_SUCCESS) { @@ -3039,7 +3142,7 @@ R_Init_RTX(qboolean total) cvar_pt_aperture_type->changed = accumulation_cvar_changed; cvar_pt_aperture_angle->changed = accumulation_cvar_changed; cvar_pt_focus->changed = accumulation_cvar_changed; - cvar_pt_freecam->changed = accumulation_cvar_changed; + cvar_pt_freecam->changed = accumulation_cvar_changed; cvar_pt_projection->changed = accumulation_cvar_changed; cvar_pt_num_bounce_rays->flags |= CVAR_ARCHIVE; @@ -3066,7 +3169,7 @@ R_Init_RTX(qboolean total) _VK(vkpt_initialize_all(VKPT_INIT_SWAPCHAIN_RECREATE)); Cmd_AddCommand("reload_shader", (xcommand_t)&vkpt_reload_shader); - Cmd_AddCommand("reload_textures", (xcommand_t)&vkpt_reload_textures); + Cmd_AddCommand("reload_textures", (xcommand_t)&vkpt_reload_textures); Cmd_AddCommand("reload_materials", (xcommand_t)&vkpt_reload_materials); Cmd_AddCommand("save_materials", (xcommand_t)&vkpt_save_materials); Cmd_AddCommand("set_material", (xcommand_t)&vkpt_set_material); @@ -3365,20 +3468,18 @@ R_BeginRegistration_RTX(const char *name) Cvar_Set("sv_novis", vkpt_refdef.bsp_mesh_world.num_cameras > 0 ? "1" : "0"); - // register physical sky attributes based on map name lookup - vkpt_physical_sky_beginRegistration(); - UpdatePhysicalSkyCVars(); + // register physical sky attributes based on map name lookup + vkpt_physical_sky_beginRegistration(); + UpdatePhysicalSkyCVars(); vkpt_physical_sky_latch_local_time(); vkpt_bloom_reset(); vkpt_tone_mapping_request_reset(); vkpt_light_buffer_reset_counts(); - _VK(vkpt_pt_destroy_static()); + vkpt_pt_destroy_static(); const bsp_mesh_t *m = &vkpt_refdef.bsp_mesh_world; _VK(vkpt_pt_create_static( - qvk.buf_vertex_bsp.buffer, - offsetof(BspVertexBuffer, positions_bsp), m->world_idx_count, m->world_transparent_count, m->world_sky_count, @@ -3392,8 +3493,8 @@ void R_EndRegistration_RTX(void) { LOG_FUNC(); - - vkpt_physical_sky_endRegistration(); + + vkpt_physical_sky_endRegistration(); IMG_FreeUnused(); MOD_FreeUnused(); @@ -3499,7 +3600,8 @@ void vkpt_reset_command_buffers(cmd_buf_group_t* group) for (int i = 0; i < group->count_per_frame; i++) { void* addr = group->buffer_begin_addrs[group->count_per_frame * qvk.current_frame_index + i]; - assert(addr == 0); + //seth: this seems unrelated to the raytracing changes, but skip it until raytracing is working + //assert(addr == 0); } #endif } @@ -3537,8 +3639,8 @@ void vkpt_submit_command_buffer( }; #ifdef VKPT_DEVICE_GROUPS - VkDeviceGroupSubmitInfoKHR device_group_submit_info = { - .sType = VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO_KHR, + VkDeviceGroupSubmitInfo device_group_submit_info = { + .sType = VK_STRUCTURE_TYPE_DEVICE_GROUP_SUBMIT_INFO, .pNext = NULL, .waitSemaphoreCount = wait_semaphore_count, .pWaitSemaphoreDeviceIndices = wait_device_indices, diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index c017e6df9..8e110e6f3 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -34,63 +34,56 @@ with this program; if not, write to the Free Software Foundation, Inc., #define INSTANCE_MAX_NUM 12 -static VkPhysicalDeviceRayTracingPropertiesNV rt_properties = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV, - .pNext = NULL, - .maxRecursionDepth = 0, /* updated during init */ - .shaderGroupHandleSize = 0 -}; +static uint32_t shaderGroupHandleSize = 0; +static uint32_t shaderGroupBaseAlignment = 0; typedef struct accel_bottom_match_info_s { - VkGeometryFlagsNV flags; - uint32_t vertexCount; - uint32_t indexCount; + int fast_build; + uint32_t vertex_count; + uint32_t index_count; } accel_bottom_match_info_t; typedef struct accel_top_match_info_s { uint32_t instanceCount; } accel_top_match_info_t; -static BufferResource_t buf_accel_scratch; -static size_t scratch_buf_ptr = 0; -static BufferResource_t buf_instances [MAX_FRAMES_IN_FLIGHT]; -static VkAccelerationStructureNV accel_static; -static VkAccelerationStructureNV accel_transparent; -static VkAccelerationStructureNV accel_sky; -static VkAccelerationStructureNV accel_custom_sky; -static int transparent_primitive_offset = 0; -static int sky_primitive_offset = 0; -static int custom_sky_primitive_offset = 0; -static int transparent_model_primitive_offset = 0; -static int transparent_models_present = 0; -static int viewer_model_primitive_offset = 0; -static int viewer_weapon_primitive_offset = 0; -static int explosions_primitive_offset = 0; -static int explosions_present = 0; -static VkAccelerationStructureNV accel_dynamic [MAX_FRAMES_IN_FLIGHT]; -static accel_bottom_match_info_t accel_dynamic_match[MAX_FRAMES_IN_FLIGHT]; -static VkAccelerationStructureNV accel_transparent_models[MAX_FRAMES_IN_FLIGHT]; -static accel_bottom_match_info_t accel_transparent_models_match[MAX_FRAMES_IN_FLIGHT]; -static VkAccelerationStructureNV accel_viewer_models[MAX_FRAMES_IN_FLIGHT]; -static accel_bottom_match_info_t accel_viewer_models_match[MAX_FRAMES_IN_FLIGHT]; -static VkAccelerationStructureNV accel_viewer_weapon[MAX_FRAMES_IN_FLIGHT]; -static accel_bottom_match_info_t accel_viewer_weapon_match[MAX_FRAMES_IN_FLIGHT]; -static VkAccelerationStructureNV accel_explosions[MAX_FRAMES_IN_FLIGHT]; -static accel_bottom_match_info_t accel_explosions_match[MAX_FRAMES_IN_FLIGHT]; -static VkAccelerationStructureNV accel_top [MAX_FRAMES_IN_FLIGHT]; -static accel_top_match_info_t accel_top_match[MAX_FRAMES_IN_FLIGHT]; -static VkDeviceMemory mem_accel_static; -static VkDeviceMemory mem_accel_transparent; -static VkDeviceMemory mem_accel_sky; -static VkDeviceMemory mem_accel_custom_sky; -static VkDeviceMemory mem_accel_top[MAX_FRAMES_IN_FLIGHT]; -static VkDeviceMemory mem_accel_dynamic[MAX_FRAMES_IN_FLIGHT]; -static VkDeviceMemory mem_accel_transparent_models[MAX_FRAMES_IN_FLIGHT]; -static VkDeviceMemory mem_accel_viewer_models[MAX_FRAMES_IN_FLIGHT]; -static VkDeviceMemory mem_accel_viewer_weapon[MAX_FRAMES_IN_FLIGHT]; -static VkDeviceMemory mem_accel_explosions[MAX_FRAMES_IN_FLIGHT]; - -static BufferResource_t buf_shader_binding_table; +typedef struct { + VkAccelerationStructureKHR accel_khr; + VkAccelerationStructureNV accel_nv; + accel_bottom_match_info_t match; + BufferResource_t mem; + qboolean present; +} blas_t; + +static BufferResource_t buf_accel_scratch; +static size_t scratch_buf_ptr = 0; +static BufferResource_t buf_instances[MAX_FRAMES_IN_FLIGHT]; +static int transparent_primitive_offset = 0; +static int sky_primitive_offset = 0; +static int custom_sky_primitive_offset = 0; +static int transparent_model_primitive_offset = 0; +static int viewer_model_primitive_offset = 0; +static int viewer_weapon_primitive_offset = 0; +static int explosions_primitive_offset = 0; +static blas_t blas_static; +static blas_t blas_transparent; +static blas_t blas_sky; +static blas_t blas_custom_sky; +static blas_t blas_dynamic[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_transparent_models[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_viewer_models[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_viewer_weapon[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_explosions[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_particles[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_beams[MAX_FRAMES_IN_FLIGHT]; +static blas_t blas_sprites[MAX_FRAMES_IN_FLIGHT]; + +static VkAccelerationStructureKHR accel_top_khr[MAX_FRAMES_IN_FLIGHT]; +static VkAccelerationStructureNV accel_top_nv[MAX_FRAMES_IN_FLIGHT]; +static accel_top_match_info_t accel_top_match[MAX_FRAMES_IN_FLIGHT]; +static BufferResource_t mem_accel_top[MAX_FRAMES_IN_FLIGHT]; + +static BufferResource_t buf_shader_binding_table; static VkDescriptorPool rt_descriptor_pool; static VkDescriptorSet rt_descriptor_set[MAX_FRAMES_IN_FLIGHT]; @@ -107,75 +100,107 @@ extern cvar_t *cvar_pt_reflect_refract; typedef struct QvkGeometryInstance_s { - float transform[12]; - uint32_t instance_id : 24; - uint32_t mask : 8; - uint32_t instance_offset : 24; - uint32_t flags : 8; - uint64_t acceleration_structure_handle; + float transform[12]; + uint32_t instance_id : 24; + uint32_t mask : 8; + uint32_t instance_offset : 24; + uint32_t flags : 8; + uint64_t acceleration_structure; // handle on NV, address on KHR } QvkGeometryInstance_t; +typedef struct { + int gpu_index; + int bounce; +} pt_push_constants_t; + #define MEM_BARRIER_BUILD_ACCEL(cmd_buf, ...) \ do { \ VkMemoryBarrier mem_barrier = { \ .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, \ - .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_NV \ - | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV, \ - .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV, \ + .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR \ + | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, \ + .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, \ __VA_ARGS__ \ }; \ \ - vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_NV, \ - VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_NV, 0, 1, \ + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, \ + VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, 0, 1, \ &mem_barrier, 0, 0, 0, 0); \ } while(0) VkResult vkpt_pt_init() { - VkPhysicalDeviceProperties2 dev_props2 = { - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - .pNext = &rt_properties, - }; + if (qvk.use_khr_ray_tracing) + { + VkPhysicalDeviceRayTracingPipelinePropertiesKHR rt_properties_khr = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR, + .pNext = NULL + }; + + VkPhysicalDeviceProperties2 dev_props2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &rt_properties_khr, + }; + + vkGetPhysicalDeviceProperties2(qvk.physical_device, &dev_props2); + + shaderGroupBaseAlignment = rt_properties_khr.shaderGroupBaseAlignment; + shaderGroupHandleSize = rt_properties_khr.shaderGroupHandleSize; + } + else + { + VkPhysicalDeviceRayTracingPropertiesNV rt_properties_nv = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PROPERTIES_NV, + .pNext = NULL + }; - vkGetPhysicalDeviceProperties2(qvk.physical_device, &dev_props2); + VkPhysicalDeviceProperties2 dev_props2 = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &rt_properties_nv, + }; - Com_Printf("Maximum recursion depth: %d\n", rt_properties.maxRecursionDepth); - Com_Printf("Shader group handle size: %d\n", rt_properties.shaderGroupHandleSize); + vkGetPhysicalDeviceProperties2(qvk.physical_device, &dev_props2); + + shaderGroupBaseAlignment = rt_properties_nv.shaderGroupBaseAlignment; + shaderGroupHandleSize = rt_properties_nv.shaderGroupHandleSize; + } - buffer_create(&buf_accel_scratch, SIZE_SCRATCH_BUFFER, VK_BUFFER_USAGE_RAY_TRACING_BIT_NV, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + buffer_create(&buf_accel_scratch, SIZE_SCRATCH_BUFFER, + qvk.use_khr_ray_tracing ? VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : VK_BUFFER_USAGE_RAY_TRACING_BIT_NV, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { - buffer_create(buf_instances + i, INSTANCE_MAX_NUM * sizeof(QvkGeometryInstance_t), VK_BUFFER_USAGE_RAY_TRACING_BIT_NV, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + buffer_create(buf_instances + i, INSTANCE_MAX_NUM * sizeof(QvkGeometryInstance_t), + VK_BUFFER_USAGE_TRANSFER_DST_BIT | (qvk.use_khr_ray_tracing ? VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT : VK_BUFFER_USAGE_RAY_TRACING_BIT_NV), + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); } /* create descriptor set layout */ VkDescriptorSetLayoutBinding bindings[] = { { .binding = RAY_GEN_ACCEL_STRUCTURE_BINDING_IDX, - .descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV, + .descriptorType = qvk.use_khr_ray_tracing ? VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR : VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV, .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_NV, + .stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR, }, { .binding = RAY_GEN_PARTICLE_COLOR_BUFFER_BINDING_IDX, .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_ANY_HIT_BIT_NV, + .stageFlags = VK_SHADER_STAGE_ANY_HIT_BIT_KHR, }, { .binding = RAY_GEN_BEAM_COLOR_BUFFER_BINDING_IDX, .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_ANY_HIT_BIT_NV, + .stageFlags = VK_SHADER_STAGE_ANY_HIT_BIT_KHR, }, { .binding = RAY_GEN_SPRITE_INFO_BUFFER_BINDING_IDX, .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, .descriptorCount = 1, - .stageFlags = VK_SHADER_STAGE_ANY_HIT_BIT_NV, + .stageFlags = VK_SHADER_STAGE_ANY_HIT_BIT_KHR, }, }; @@ -197,9 +222,9 @@ vkpt_pt_init() /* create pipeline */ VkPushConstantRange push_constant_range = { - .stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_NV, + .stageFlags = VK_SHADER_STAGE_RAYGEN_BIT_KHR, .offset = 0, - .size = sizeof(int) * 2, + .size = sizeof(pt_push_constants_t), }; VkPipelineLayoutCreateInfo pipeline_layout_create_info = { @@ -214,8 +239,8 @@ vkpt_pt_init() ATTACH_LABEL_VARIABLE(rt_pipeline_layout, PIPELINE_LAYOUT); VkDescriptorPoolSize pool_sizes[] = { - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_FRAMES_IN_FLIGHT }, - { VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV, MAX_FRAMES_IN_FLIGHT } + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_FRAMES_IN_FLIGHT }, + { qvk.use_khr_ray_tracing ? VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR : VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV, MAX_FRAMES_IN_FLIGHT } }; VkDescriptorPoolCreateInfo pool_create_info = { @@ -251,10 +276,16 @@ VkResult vkpt_pt_update_descripter_set_bindings(int idx) { /* update descriptor set bindings */ - VkWriteDescriptorSetAccelerationStructureNV desc_accel_struct_info = { + VkWriteDescriptorSetAccelerationStructureKHR desc_accel_struct_info_khr = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR, + .accelerationStructureCount = 1, + .pAccelerationStructures = accel_top_khr + idx + }; + + VkWriteDescriptorSetAccelerationStructureNV desc_accel_struct_info_nv = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_NV, .accelerationStructureCount = 1, - .pAccelerationStructures = accel_top + idx + .pAccelerationStructures = accel_top_nv + idx }; VkBufferView particle_color_buffer_view = get_transparency_particle_color_buffer_view(); @@ -264,11 +295,11 @@ vkpt_pt_update_descripter_set_bindings(int idx) VkWriteDescriptorSet writes[] = { { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .pNext = &desc_accel_struct_info, + .pNext = qvk.use_khr_ray_tracing ? (const void*)&desc_accel_struct_info_khr : (const void*)&desc_accel_struct_info_nv, .dstSet = rt_descriptor_set[idx], .dstBinding = RAY_GEN_ACCEL_STRUCTURE_BINDING_IDX, .descriptorCount = 1, - .descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV + .descriptorType = qvk.use_khr_ray_tracing ? VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR : VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV }, { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -301,9 +332,8 @@ vkpt_pt_update_descripter_set_bindings(int idx) return VK_SUCCESS; } - static size_t -get_scratch_buffer_size(VkAccelerationStructureNV ac) +get_scratch_buffer_size_nv(VkAccelerationStructureNV ac) { VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, @@ -318,324 +348,339 @@ get_scratch_buffer_size(VkAccelerationStructureNV ac) return mem_req.memoryRequirements.size; } -static inline VkGeometryNV -get_geometry(VkBuffer buffer, size_t offset, uint32_t num_vertices) +static void destroy_blas(blas_t* blas) { - size_t size_per_vertex = sizeof(float) * 3; - VkGeometryNV geometry = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, - .geometry = { - .triangles = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV, - .vertexData = buffer, - .vertexOffset = offset, - .vertexCount = num_vertices, - .vertexStride = size_per_vertex, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .indexType = VK_INDEX_TYPE_NONE_NV, - .indexCount = 0, - }, - .aabbs = { .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV } - } - }; - return geometry; -} + buffer_destroy(&blas->mem); -VkResult -vkpt_pt_destroy_static() -{ - if(mem_accel_static) { - vkFreeMemory(qvk.device, mem_accel_static, NULL); - mem_accel_static = VK_NULL_HANDLE; - } - if (mem_accel_transparent) { - vkFreeMemory(qvk.device, mem_accel_transparent, NULL); - mem_accel_transparent = VK_NULL_HANDLE; - } - if (mem_accel_sky) { - vkFreeMemory(qvk.device, mem_accel_sky, NULL); - mem_accel_sky = VK_NULL_HANDLE; - } - if (mem_accel_custom_sky) { - vkFreeMemory(qvk.device, mem_accel_custom_sky, NULL); - mem_accel_custom_sky = VK_NULL_HANDLE; - } - if(accel_static) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_static, NULL); - accel_static = VK_NULL_HANDLE; - } - if (accel_transparent) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_transparent, NULL); - accel_transparent = VK_NULL_HANDLE; - } - if (accel_sky) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_sky, NULL); - accel_sky = VK_NULL_HANDLE; - } - if (accel_custom_sky) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_custom_sky, NULL); - accel_custom_sky = VK_NULL_HANDLE; + if (blas->accel_khr) + { + qvkDestroyAccelerationStructureKHR(qvk.device, blas->accel_khr, NULL); + blas->accel_khr = VK_NULL_HANDLE; } - return VK_SUCCESS; -} -VkResult -vkpt_pt_destroy_dynamic(int idx) -{ - if(mem_accel_dynamic[idx]) { - vkFreeMemory(qvk.device, mem_accel_dynamic[idx], NULL); - mem_accel_dynamic[idx] = VK_NULL_HANDLE; - } - if(accel_dynamic[idx]) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_dynamic[idx], NULL); - accel_dynamic[idx] = VK_NULL_HANDLE; - } - return VK_SUCCESS; -} - -VkResult -vkpt_pt_destroy_transparent_models(int idx) -{ - if (mem_accel_transparent_models[idx]) { - vkFreeMemory(qvk.device, mem_accel_transparent_models[idx], NULL); - mem_accel_transparent_models[idx] = VK_NULL_HANDLE; - } - if (accel_transparent_models[idx]) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_transparent_models[idx], NULL); - accel_transparent_models[idx] = VK_NULL_HANDLE; + if (blas->accel_nv) + { + qvkDestroyAccelerationStructureNV(qvk.device, blas->accel_nv, NULL); + blas->accel_nv = VK_NULL_HANDLE; } - return VK_SUCCESS; -} -VkResult -vkpt_pt_destroy_viewer_models(int idx) -{ - if(mem_accel_viewer_models[idx]) { - vkFreeMemory(qvk.device, mem_accel_viewer_models[idx], NULL); - mem_accel_viewer_models[idx] = VK_NULL_HANDLE; - } - if(accel_viewer_models[idx]) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_viewer_models[idx], NULL); - accel_viewer_models[idx] = VK_NULL_HANDLE; - } - return VK_SUCCESS; + blas->match.fast_build = 0; + blas->match.index_count = 0; + blas->match.vertex_count = 0; } -VkResult -vkpt_pt_destroy_viewer_weapon(int idx) +void vkpt_pt_destroy_static() { - if(mem_accel_viewer_weapon[idx]) { - vkFreeMemory(qvk.device, mem_accel_viewer_weapon[idx], NULL); - mem_accel_viewer_weapon[idx] = VK_NULL_HANDLE; - } - if(accel_viewer_weapon[idx]) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_viewer_weapon[idx], NULL); - accel_viewer_weapon[idx] = VK_NULL_HANDLE; - } - return VK_SUCCESS; + destroy_blas(&blas_static); + destroy_blas(&blas_transparent); + destroy_blas(&blas_sky); + destroy_blas(&blas_custom_sky); } -VkResult -vkpt_pt_destroy_explosions(int idx) +static void vkpt_pt_destroy_dynamic(int idx) { - if (mem_accel_explosions[idx]) { - vkFreeMemory(qvk.device, mem_accel_explosions[idx], NULL); - mem_accel_explosions[idx] = VK_NULL_HANDLE; - } - if (accel_explosions[idx]) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_explosions[idx], NULL); - accel_explosions[idx] = VK_NULL_HANDLE; - } - return VK_SUCCESS; + destroy_blas(&blas_dynamic[idx]); + destroy_blas(&blas_transparent_models[idx]); + destroy_blas(&blas_viewer_models[idx]); + destroy_blas(&blas_viewer_weapon[idx]); + destroy_blas(&blas_explosions[idx]); + destroy_blas(&blas_particles[idx]); + destroy_blas(&blas_beams[idx]); + destroy_blas(&blas_sprites[idx]); } -static int accel_matches(accel_bottom_match_info_t *match, - VkGeometryFlagsNV flags, - uint32_t vertex_count, - uint32_t index_count) { - return match->flags == flags && - match->vertexCount >= vertex_count && - match->indexCount >= index_count; +static inline int accel_matches(accel_bottom_match_info_t *match, + int fast_build, + uint32_t vertex_count, + uint32_t index_count) { + return match->fast_build == fast_build && + match->vertex_count >= vertex_count && + match->index_count >= index_count; } // How much to bloat the dynamic geometry allocations // to try to avoid later allocations. #define DYNAMIC_GEOMETRY_BLOAT_FACTOR 2 + static VkResult vkpt_pt_create_accel_bottom( - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices, - VkAccelerationStructureNV *accel, - accel_bottom_match_info_t *match, - VkDeviceMemory *mem_accel, - VkCommandBuffer cmd_buf, - int fast_build - ) + VkCommandBuffer cmd_buf, + BufferResource_t* buffer_vertex, + VkDeviceAddress offset_vertex, + BufferResource_t* buffer_index, + VkDeviceAddress offset_index, + int num_vertices, + int num_indices, + blas_t* blas, + qboolean is_dynamic, + qboolean fast_build) { - assert(accel); - assert(mem_accel); + assert(blas); - VkGeometryNV geometry = get_geometry(vertex_buffer, buffer_offset, num_vertices); + if (num_vertices == 0) + { + blas->present = qfalse; + return VK_SUCCESS; + } - int doFree = 0; - int doAlloc = 0; + if (qvk.use_khr_ray_tracing) + { + assert(buffer_vertex->address); + if (buffer_index) assert(buffer_index->address); + + const VkAccelerationStructureGeometryTrianglesDataKHR triangles = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, + .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, + .vertexData = {.deviceAddress = buffer_vertex->address + offset_vertex }, + .vertexStride = sizeof(float) * 3, + .maxVertex = num_vertices - 1, + .indexData = {.deviceAddress = buffer_index ? (buffer_index->address + offset_index) : 0 }, + .indexType = buffer_index ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_NONE_KHR, + }; - if (!match || !accel_matches(match, geometry.flags, num_vertices, num_vertices) || *accel == VK_NULL_HANDLE) { - doAlloc = 1; - doFree = (*accel != VK_NULL_HANDLE); - } + const VkAccelerationStructureGeometryDataKHR geometry_data = { + .triangles = triangles + }; + + const VkAccelerationStructureGeometryKHR geometry = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, + .geometry = geometry_data + }; + + const VkAccelerationStructureGeometryKHR* geometries = &geometry; + + VkAccelerationStructureBuildGeometryInfoKHR buildInfo; - if (doFree) { - if (*mem_accel) { - vkFreeMemory(qvk.device, *mem_accel, NULL); - *mem_accel = VK_NULL_HANDLE; + // Prepare build info now, acceleration is filled later + buildInfo.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR; + buildInfo.pNext = NULL; + buildInfo.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR; + buildInfo.flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR; + buildInfo.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR; + buildInfo.srcAccelerationStructure = VK_NULL_HANDLE; + buildInfo.dstAccelerationStructure = VK_NULL_HANDLE; + buildInfo.geometryCount = 1; + buildInfo.pGeometries = geometries; + buildInfo.ppGeometries = NULL; + + int doFree = 0; + int doAlloc = 0; + + if (!is_dynamic || !accel_matches(&blas->match, fast_build, num_vertices, num_indices) || blas->accel_khr == VK_NULL_HANDLE) + { + doAlloc = 1; + doFree = (blas->accel_khr != VK_NULL_HANDLE); } - if (*accel) { - qvkDestroyAccelerationStructureNV(qvk.device, *accel, NULL); - *accel = VK_NULL_HANDLE; + + if (doFree) + { + destroy_blas(blas); } - } - if (doAlloc) { - VkGeometryNV allocGeometry = geometry; + // Find size to build on the device + uint32_t max_primitive_count = max(num_vertices, num_indices) / 3; // number of tris + VkAccelerationStructureBuildSizesInfoKHR sizeInfo = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR }; + qvkGetAccelerationStructureBuildSizesKHR(qvk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &max_primitive_count, &sizeInfo); + + if (doAlloc) + { + int num_vertices_to_allocate = num_vertices; + int num_indices_to_allocate = num_indices; + + // Allocate more memory / larger BLAS for dynamic objects + if (is_dynamic) + { + num_vertices_to_allocate *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + num_indices_to_allocate *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + + max_primitive_count = max(num_vertices_to_allocate, num_indices_to_allocate) / 3; + qvkGetAccelerationStructureBuildSizesKHR(qvk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &max_primitive_count, &sizeInfo); + } + + // Create acceleration structure + VkAccelerationStructureCreateInfoKHR createInfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .size = sizeInfo.accelerationStructureSize + }; + + // Create the buffer for the acceleration structure + buffer_create(&blas->mem, sizeInfo.accelerationStructureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + createInfo.buffer = blas->mem.buffer; - // Only dynamic geometries have match info - if (match) { - allocGeometry.geometry.triangles.indexCount *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; - allocGeometry.geometry.triangles.vertexCount *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + // Create the acceleration structure + qvkCreateAccelerationStructureKHR(qvk.device, &createInfo, NULL, &blas->accel_khr); + + blas->match.fast_build = fast_build; + blas->match.vertex_count = num_vertices_to_allocate; + blas->match.index_count = num_indices_to_allocate; } - VkAccelerationStructureCreateInfoNV accel_create_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV, - .info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .instanceCount = 0, - .geometryCount = 1, - .pGeometries = &allocGeometry, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - .flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV + // set where the build lands + buildInfo.dstAccelerationStructure = blas->accel_khr; + + // Use shared scratch buffer for holding the temporary data of the acceleration structure builder + buildInfo.scratchData.deviceAddress = buf_accel_scratch.address + scratch_buf_ptr; + assert(buf_accel_scratch.address); + + // Update the scratch buffer ptr + scratch_buf_ptr += sizeInfo.buildScratchSize; + assert(scratch_buf_ptr < SIZE_SCRATCH_BUFFER); + + // build offset + VkAccelerationStructureBuildRangeInfoKHR offset = { .primitiveCount = max(num_vertices, num_indices) / 3 }; + VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; + + qvkCmdBuildAccelerationStructuresKHR(cmd_buf, 1, &buildInfo, &offsets); + } + else // (!qvk.use_khr_ray_tracing) + { + VkGeometryNV geometry = { + .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, + .geometry = { + .triangles = { + .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV, + .vertexData = buffer_vertex->buffer, + .vertexOffset = offset_vertex, + .vertexCount = num_vertices, + .vertexStride = sizeof(float) * 3, + .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, + .indexData = buffer_index ? buffer_index->buffer : VK_NULL_HANDLE, + .indexType = buffer_index ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_NONE_NV, + .indexCount = num_indices, + }, + .aabbs = { .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV } } }; - qvkCreateAccelerationStructureNV(qvk.device, &accel_create_info, NULL, accel); + int doFree = 0; + int doAlloc = 0; - VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, - .accelerationStructure = *accel, - .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV - }; - VkMemoryRequirements2 mem_req = { 0 }; - mem_req.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &mem_req_info, &mem_req); + if (!is_dynamic || !accel_matches(&blas->match, fast_build, num_vertices, num_indices) || blas->accel_nv == VK_NULL_HANDLE) { + doAlloc = 1; + doFree = (blas->accel_nv != VK_NULL_HANDLE); + } - _VK(allocate_gpu_memory(mem_req.memoryRequirements, mem_accel)); + if (doFree) + { + destroy_blas(blas); + } - VkBindAccelerationStructureMemoryInfoNV bind_info = { - .sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV, - .accelerationStructure = *accel, - .memory = *mem_accel, - }; + if (doAlloc) + { + VkGeometryNV allocGeometry = geometry; - _VK(qvkBindAccelerationStructureMemoryNV(qvk.device, 1, &bind_info)); + // Allocate more memory / larger BLAS for dynamic objects + if (is_dynamic) + { + allocGeometry.geometry.triangles.indexCount *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + allocGeometry.geometry.triangles.vertexCount *= DYNAMIC_GEOMETRY_BLOAT_FACTOR; + } - if (match) { - match->flags = allocGeometry.flags; - match->vertexCount = allocGeometry.geometry.triangles.vertexCount; - match->indexCount = num_vertices; + VkAccelerationStructureCreateInfoNV accel_create_info = + { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV, + .info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, + .instanceCount = 0, + .geometryCount = 1, + .pGeometries = &allocGeometry, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, + .flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV + } + }; + + qvkCreateAccelerationStructureNV(qvk.device, &accel_create_info, NULL, &blas->accel_nv); + + VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, + .accelerationStructure = blas->accel_nv, + .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV + }; + + VkMemoryRequirements2 mem_req = { 0 }; + mem_req.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &mem_req_info, &mem_req); + + _VK(buffer_create(&blas->mem, mem_req.memoryRequirements.size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)); + + VkBindAccelerationStructureMemoryInfoNV bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV, + .accelerationStructure = blas->accel_nv, + .memory = blas->mem.memory + }; + + _VK(qvkBindAccelerationStructureMemoryNV(qvk.device, 1, &bind_info)); + + blas->match.fast_build = fast_build; + blas->match.vertex_count = allocGeometry.geometry.triangles.vertexCount; + blas->match.index_count = allocGeometry.geometry.triangles.indexCount; } - } - size_t scratch_buf_size = get_scratch_buffer_size(*accel); - assert(scratch_buf_ptr + scratch_buf_size < SIZE_SCRATCH_BUFFER); + size_t scratch_buf_size = get_scratch_buffer_size_nv(blas->accel_nv); + assert(scratch_buf_ptr + scratch_buf_size < SIZE_SCRATCH_BUFFER); - VkAccelerationStructureInfoNV as_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - .geometryCount = 1, - .pGeometries = &geometry, - .flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV - }; + VkAccelerationStructureInfoNV as_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, + .geometryCount = 1, + .pGeometries = &geometry, + .flags = fast_build ? VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_NV : VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV + }; - qvkCmdBuildAccelerationStructureNV(cmd_buf, &as_info, + qvkCmdBuildAccelerationStructureNV(cmd_buf, &as_info, VK_NULL_HANDLE, /* instance buffer */ 0 /* instance offset */, VK_FALSE, /* update */ - *accel, - VK_NULL_HANDLE, /* source acceleration structure ?? */ + blas->accel_nv, + VK_NULL_HANDLE, buf_accel_scratch.buffer, scratch_buf_ptr); - scratch_buf_ptr += scratch_buf_size; + scratch_buf_ptr += scratch_buf_size; + } + + blas->present = qtrue; return VK_SUCCESS; } VkResult vkpt_pt_create_static( - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices, - int num_vertices_transparent, - int num_vertices_sky, - int num_vertices_custom_sky - ) + int num_vertices, + int num_vertices_transparent, + int num_vertices_sky, + int num_vertices_custom_sky) { VkCommandBuffer cmd_buf = vkpt_begin_command_buffer(&qvk.cmd_buffers_graphics); + VkDeviceAddress address_vertex = offsetof(BspVertexBuffer, positions_bsp); scratch_buf_ptr = 0; - VkResult ret = vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset, - num_vertices, - &accel_static, - NULL, - &mem_accel_static, - cmd_buf, - VK_FALSE); + VkResult ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices, 0, &blas_static, qfalse, qfalse); MEM_BARRIER_BUILD_ACCEL(cmd_buf); + address_vertex += num_vertices * sizeof(float) * 3; scratch_buf_ptr = 0; - ret = vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + num_vertices * sizeof(float) * 3, - num_vertices_transparent, - &accel_transparent, - NULL, - &mem_accel_transparent, - cmd_buf, - VK_FALSE); + ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_transparent, 0, &blas_transparent, qfalse, qfalse); MEM_BARRIER_BUILD_ACCEL(cmd_buf); + address_vertex += num_vertices_transparent * sizeof(float) * 3; scratch_buf_ptr = 0; - ret = vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + (num_vertices + num_vertices_transparent) * sizeof(float) * 3, - num_vertices_sky, - &accel_sky, - NULL, - &mem_accel_sky, - cmd_buf, - VK_FALSE); + ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_sky, 0, &blas_sky, qfalse, qfalse); MEM_BARRIER_BUILD_ACCEL(cmd_buf); + address_vertex += num_vertices_sky * sizeof(float) * 3; scratch_buf_ptr = 0; - ret = vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + (num_vertices + num_vertices_transparent + num_vertices_sky) * sizeof(float) * 3, - num_vertices_custom_sky, - &accel_custom_sky, - NULL, - &mem_accel_custom_sky, - cmd_buf, - VK_FALSE); + ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_custom_sky, 0, &blas_custom_sky, qfalse, qfalse); MEM_BARRIER_BUILD_ACCEL(cmd_buf); + address_vertex += num_vertices_custom_sky * sizeof(float) * 3; scratch_buf_ptr = 0; transparent_primitive_offset = num_vertices / 3; @@ -648,137 +693,6 @@ vkpt_pt_create_static( return ret; } -static VkResult -vkpt_pt_create_dynamic( - VkCommandBuffer cmd_buf, - int idx, - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices - ) -{ - return vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset, - num_vertices, - accel_dynamic + idx, - accel_dynamic_match + idx, - mem_accel_dynamic + idx, - cmd_buf, - VK_TRUE); -} - -static VkResult -vkpt_pt_create_transparent_models( - VkCommandBuffer cmd_buf, - int idx, - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices, - int vertex_offset - ) -{ - transparent_model_primitive_offset = vertex_offset / 3; - - if (num_vertices > 0) - { - transparent_models_present = VK_TRUE; - - return vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + vertex_offset * 3 * sizeof(float), - num_vertices, - accel_transparent_models + idx, - accel_transparent_models_match + idx, - mem_accel_transparent_models + idx, - cmd_buf, - VK_TRUE); - } - else - { - transparent_models_present = VK_FALSE; - return VK_SUCCESS; - } -} - -static VkResult -vkpt_pt_create_viewer_models( - VkCommandBuffer cmd_buf, - int idx, - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices, - int vertex_offset - ) -{ - viewer_model_primitive_offset = vertex_offset / 3; - - return vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + vertex_offset * 3 * sizeof(float), - num_vertices, - accel_viewer_models + idx, - accel_viewer_models_match + idx, - mem_accel_viewer_models + idx, - cmd_buf, - VK_TRUE); -} - -static VkResult -vkpt_pt_create_viewer_weapon( - VkCommandBuffer cmd_buf, - int idx, - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices, - int vertex_offset - ) -{ - viewer_weapon_primitive_offset = vertex_offset / 3; - - return vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + vertex_offset * 3 * sizeof(float), - num_vertices, - accel_viewer_weapon + idx, - accel_viewer_weapon_match + idx, - mem_accel_viewer_weapon + idx, - cmd_buf, - VK_TRUE); -} - -static VkResult -vkpt_pt_create_explosions( - VkCommandBuffer cmd_buf, - int idx, - VkBuffer vertex_buffer, - size_t buffer_offset, - int num_vertices, - int vertex_offset -) -{ - if (num_vertices > 0) - { - explosions_primitive_offset = vertex_offset / 3; - explosions_present = VK_TRUE; - - return vkpt_pt_create_accel_bottom( - vertex_buffer, - buffer_offset + vertex_offset * 3 * sizeof(float), - num_vertices, - accel_explosions + idx, - accel_explosions_match + idx, - mem_accel_explosions + idx, - cmd_buf, - VK_TRUE); - } - else - { - explosions_present = VK_FALSE; - return VK_SUCCESS; - } -} - VkResult vkpt_pt_create_all_dynamic( VkCommandBuffer cmd_buf, @@ -787,24 +701,39 @@ vkpt_pt_create_all_dynamic( { scratch_buf_ptr = 0; - vkpt_pt_create_dynamic(cmd_buf, qvk.current_frame_index, qvk.buf_vertex_model_dynamic.buffer, - offsetof(ModelDynamicVertexBuffer, positions_instanced), upload_info->dynamic_vertex_num); + uint64_t offset_vertex_base = offsetof(ModelDynamicVertexBuffer, positions_instanced); + uint64_t offset_vertex = offset_vertex_base; + uint64_t offset_index = 0; + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->dynamic_vertex_num, 0, blas_dynamic + idx, qtrue, qtrue); + + transparent_model_primitive_offset = upload_info->transparent_model_vertex_offset / 3; + offset_vertex = offset_vertex_base + upload_info->transparent_model_vertex_offset * sizeof(float) * 3; + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->transparent_model_vertex_num, 0, blas_transparent_models + idx, qtrue, qtrue); - vkpt_pt_create_transparent_models(cmd_buf, qvk.current_frame_index, qvk.buf_vertex_model_dynamic.buffer, - offsetof(ModelDynamicVertexBuffer, positions_instanced), upload_info->transparent_model_vertex_num, - upload_info->transparent_model_vertex_offset); + viewer_model_primitive_offset = upload_info->viewer_model_vertex_offset / 3; + offset_vertex = offset_vertex_base + upload_info->viewer_model_vertex_offset * sizeof(float) * 3; + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->viewer_model_vertex_num, 0, blas_viewer_models + idx, qtrue, qtrue); - vkpt_pt_create_viewer_models(cmd_buf, qvk.current_frame_index, qvk.buf_vertex_model_dynamic.buffer, - offsetof(ModelDynamicVertexBuffer, positions_instanced), upload_info->viewer_model_vertex_num, - upload_info->viewer_model_vertex_offset); + viewer_weapon_primitive_offset = upload_info->viewer_weapon_vertex_offset / 3; + offset_vertex = offset_vertex_base + upload_info->viewer_weapon_vertex_offset * sizeof(float) * 3; + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->viewer_weapon_vertex_num, 0, blas_viewer_weapon + idx, qtrue, qtrue); - vkpt_pt_create_viewer_weapon(cmd_buf, qvk.current_frame_index, qvk.buf_vertex_model_dynamic.buffer, - offsetof(ModelDynamicVertexBuffer, positions_instanced), upload_info->viewer_weapon_vertex_num, - upload_info->viewer_weapon_vertex_offset); + explosions_primitive_offset = upload_info->explosions_vertex_offset / 3; + offset_vertex = offset_vertex_base + upload_info->explosions_vertex_offset * sizeof(float) * 3; + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->explosions_vertex_num, 0, blas_explosions + idx, qtrue, qtrue); - vkpt_pt_create_explosions(cmd_buf, qvk.current_frame_index, qvk.buf_vertex_model_dynamic.buffer, - offsetof(ModelDynamicVertexBuffer, positions_instanced), upload_info->explosions_vertex_num, - upload_info->explosions_vertex_offset); + BufferResource_t* buffer_vertex = NULL; + BufferResource_t* buffer_index = NULL; + uint32_t num_vertices = 0; + uint32_t num_indices = 0; + vkpt_get_transparency_buffers(VKPT_TRANSPARENCY_PARTICLES, &buffer_vertex, &offset_vertex, &buffer_index, &offset_index, &num_vertices, &num_indices); + vkpt_pt_create_accel_bottom(cmd_buf, buffer_vertex, offset_vertex, buffer_index, offset_index, num_vertices, num_indices, blas_particles + idx, qtrue, qtrue); + + vkpt_get_transparency_buffers(VKPT_TRANSPARENCY_BEAMS, &buffer_vertex, &offset_vertex, &buffer_index, &offset_index, &num_vertices, &num_indices); + vkpt_pt_create_accel_bottom(cmd_buf, buffer_vertex, offset_vertex, buffer_index, offset_index, num_vertices, num_indices, blas_beams + idx, qtrue, qtrue); + + vkpt_get_transparency_buffers(VKPT_TRANSPARENCY_SPRITES, &buffer_vertex, &offset_vertex, &buffer_index, &offset_index, &num_vertices, &num_indices); + vkpt_pt_create_accel_bottom(cmd_buf, buffer_vertex, offset_vertex, buffer_index, offset_index, num_vertices, num_indices, blas_sprites + idx, qtrue, qtrue); MEM_BARRIER_BUILD_ACCEL(cmd_buf); scratch_buf_ptr = 0; @@ -815,20 +744,27 @@ vkpt_pt_create_all_dynamic( void vkpt_pt_destroy_toplevel(int idx) { - if(accel_top[idx]) { - qvkDestroyAccelerationStructureNV(qvk.device, accel_top[idx], NULL); - accel_top[idx] = VK_NULL_HANDLE; + if(accel_top_khr[idx]) { + qvkDestroyAccelerationStructureKHR(qvk.device, accel_top_khr[idx], NULL); + accel_top_khr[idx] = VK_NULL_HANDLE; accel_top_match[idx].instanceCount = 0; } - if(mem_accel_top[idx]) { - vkFreeMemory(qvk.device, mem_accel_top[idx], NULL); - mem_accel_top[idx] = VK_NULL_HANDLE; + + if (accel_top_nv[idx]) { + qvkDestroyAccelerationStructureNV(qvk.device, accel_top_nv[idx], NULL); + accel_top_nv[idx] = VK_NULL_HANDLE; + accel_top_match[idx].instanceCount = 0; } + + buffer_destroy(&mem_accel_top[idx]); } static void -append_blas(QvkGeometryInstance_t *instances, int *num_instances, VkAccelerationStructureNV blas, int instance_id, int mask, int flags, int sbt_offset) +append_blas(QvkGeometryInstance_t *instances, int *num_instances, blas_t* blas, int instance_id, int mask, int flags, int sbt_offset) { + if (!blas->present) + return; + QvkGeometryInstance_t instance = { .transform = { 1.0f, 0.0f, 0.0f, 0.0f, @@ -839,10 +775,22 @@ append_blas(QvkGeometryInstance_t *instances, int *num_instances, VkAcceleration .mask = mask, .instance_offset = sbt_offset, .flags = flags, - .acceleration_structure_handle = 1337, // will be overwritten + .acceleration_structure = 0 }; - _VK(qvkGetAccelerationStructureHandleNV(qvk.device, blas, sizeof(uint64_t), &instance.acceleration_structure_handle)); + if (qvk.use_khr_ray_tracing) + { + VkAccelerationStructureDeviceAddressInfoKHR as_device_address_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, + .accelerationStructure = blas->accel_khr, + }; + + instance.acceleration_structure = qvkGetAccelerationStructureDeviceAddressKHR(qvk.device, &as_device_address_info); + } + else + { + _VK(qvkGetAccelerationStructureHandleNV(qvk.device, blas->accel_nv, sizeof(uint64_t), &instance.acceleration_structure)); + } assert(*num_instances < INSTANCE_MAX_NUM); memcpy(instances + *num_instances, &instance, sizeof(instance)); @@ -857,113 +805,191 @@ vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world if (include_world) { - append_blas(instances, &num_instances, accel_static, 0, AS_FLAG_OPAQUE, 0, 0); - append_blas(instances, &num_instances, accel_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, 0, 0); - append_blas(instances, &num_instances, accel_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, 0, 0); - append_blas(instances, &num_instances, accel_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, 0, 0); + append_blas(instances, &num_instances, &blas_static, 0, AS_FLAG_OPAQUE, 0, 0); + append_blas(instances, &num_instances, &blas_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, 0, 0); + append_blas(instances, &num_instances, &blas_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, 0, 0); + append_blas(instances, &num_instances, &blas_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, 0, 0); } - append_blas(instances, &num_instances, accel_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, 0, 0); - if (transparent_models_present) - { - append_blas(instances, &num_instances, accel_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, 0, 0); - } + append_blas(instances, &num_instances, &blas_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, 0, 0); + append_blas(instances, &num_instances, &blas_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, 0, 0); + append_blas(instances, &num_instances, &blas_explosions[idx], AS_INSTANCE_FLAG_DYNAMIC | explosions_primitive_offset, AS_FLAG_EXPLOSIONS, 0, 3); if (cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON) { - append_blas(instances, &num_instances, accel_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV, 0); - append_blas(instances, &num_instances, accel_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_NV : 0, 0); + append_blas(instances, &num_instances, &blas_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0, 0); } - int particle_num, beam_num, sprite_num; - get_transparency_counts(&particle_num, &beam_num, &sprite_num); - - if (cvar_pt_enable_particles->integer != 0 && particle_num > 0) + if (cvar_pt_enable_particles->integer != 0) { - append_blas(instances, &num_instances, get_transparency_particle_blas(), 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV, 1); + append_blas(instances, &num_instances, &blas_particles[idx], 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 1); } - if (cvar_pt_enable_beams->integer != 0 && beam_num > 0) + if (cvar_pt_enable_beams->integer != 0) { - append_blas(instances, &num_instances, get_transparency_beam_blas(), 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV, 2); + append_blas(instances, &num_instances, &blas_beams[idx], 0, AS_FLAG_PARTICLES, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 2); } - if (cvar_pt_enable_sprites->integer != 0 && sprite_num > 0) + if (cvar_pt_enable_sprites->integer != 0) { - append_blas(instances, &num_instances, get_transparency_sprite_blas(), 0, AS_FLAG_EXPLOSIONS, VK_GEOMETRY_INSTANCE_TRIANGLE_CULL_DISABLE_BIT_NV, 4); + append_blas(instances, &num_instances, &blas_sprites[idx], 0, AS_FLAG_EXPLOSIONS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 4); } - - if(explosions_present) - { - append_blas(instances, &num_instances, accel_explosions[idx], AS_INSTANCE_FLAG_DYNAMIC | explosions_primitive_offset, AS_FLAG_EXPLOSIONS, 0, 3); - } - + void *instance_data = buffer_map(buf_instances + idx); memcpy(instance_data, &instances, sizeof(QvkGeometryInstance_t) * num_instances); buffer_unmap(buf_instances + idx); instance_data = NULL; - VkAccelerationStructureCreateInfoNV accel_create_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV, - .info = { + if (qvk.use_khr_ray_tracing) + { + // Build the TLAS + VkAccelerationStructureGeometryDataKHR geometry = { + .instances = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR, + .data = {.deviceAddress = buf_instances[idx].address } + } + }; + assert(buf_instances[idx].address); + + VkAccelerationStructureGeometryKHR topASGeometry = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR, + .geometry = geometry + }; + + // Find size to build on the device + VkAccelerationStructureBuildGeometryInfoKHR buildInfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_BUILD_BIT_KHR, + .geometryCount = 1, + .pGeometries = &topASGeometry, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, + .srcAccelerationStructure = VK_NULL_HANDLE + }; + + VkAccelerationStructureBuildSizesInfoKHR sizeInfo = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR }; + qvkGetAccelerationStructureBuildSizesKHR(qvk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &buildInfo, &num_instances, &sizeInfo); + assert(sizeInfo.accelerationStructureSize < SIZE_SCRATCH_BUFFER); + + if (accel_top_match[idx].instanceCount < num_instances) { + vkpt_pt_destroy_toplevel(idx); + + // Create the buffer for the acceleration structure + buffer_create(&mem_accel_top[idx], sizeInfo.accelerationStructureSize, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + // Create TLAS + // Create acceleration structure + VkAccelerationStructureCreateInfoKHR createInfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, + .size = sizeInfo.accelerationStructureSize, + .buffer = mem_accel_top[idx].buffer + }; + + // Create the acceleration structure + qvkCreateAccelerationStructureKHR(qvk.device, &createInfo, NULL, &accel_top_khr[idx]); + } + + // Update build information + buildInfo.dstAccelerationStructure = accel_top_khr[idx]; + buildInfo.scratchData.deviceAddress = buf_accel_scratch.address; + assert(buf_accel_scratch.address); + + VkAccelerationStructureBuildRangeInfoKHR offset = { .primitiveCount = num_instances }; + + VkAccelerationStructureBuildRangeInfoKHR* offsets = &offset; + + qvkCmdBuildAccelerationStructuresKHR( + cmd_buf, + 1, + &buildInfo, + &offsets); + } + else // (!qvk.use_khr_ray_tracing) + { + VkAccelerationStructureCreateInfoNV accel_create_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV, + .info = { .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, .instanceCount = num_instances, .geometryCount = 0, - .pGeometries = NULL, - .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV + .pGeometries = NULL, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV } - }; + }; - if (accel_top_match[idx].instanceCount < accel_create_info.info.instanceCount) { - vkpt_pt_destroy_toplevel(idx); + if (accel_top_match[idx].instanceCount < accel_create_info.info.instanceCount) + { + vkpt_pt_destroy_toplevel(idx); - qvkCreateAccelerationStructureNV(qvk.device, &accel_create_info, NULL, accel_top + idx); + qvkCreateAccelerationStructureNV(qvk.device, &accel_create_info, NULL, accel_top_nv + idx); - /* XXX: do allocation only once with safety margin */ - VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, - .accelerationStructure = accel_top[idx], - .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV + /* XXX: do allocation only once with safety margin */ + VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, + .accelerationStructure = accel_top_nv[idx], + .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV + }; + VkMemoryRequirements2 mem_req = { 0 }; + mem_req.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; + qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &mem_req_info, &mem_req); + + buffer_create(&mem_accel_top[idx], mem_req.memoryRequirements.size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + VkBindAccelerationStructureMemoryInfoNV bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV, + .accelerationStructure = accel_top_nv[idx], + .memory = mem_accel_top[idx].memory, + }; + + _VK(qvkBindAccelerationStructureMemoryNV(qvk.device, 1, &bind_info)); + + assert(get_scratch_buffer_size_nv(accel_top_nv[idx]) < SIZE_SCRATCH_BUFFER); + + accel_top_match[idx].instanceCount = accel_create_info.info.instanceCount; + } + + VkAccelerationStructureInfoNV as_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, + .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV, + .geometryCount = 0, + .pGeometries = NULL, + .instanceCount = num_instances, }; - VkMemoryRequirements2 mem_req = { 0 }; - mem_req.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &mem_req_info, &mem_req); - _VK(allocate_gpu_memory(mem_req.memoryRequirements, mem_accel_top + idx)); + // Request the amount of scratch memory, just to make the validation layer happy. + // Our static scratch buffer is definitely big enough. - VkBindAccelerationStructureMemoryInfoNV bind_info = { - .sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV, - .accelerationStructure = accel_top[idx], - .memory = mem_accel_top[idx], + VkAccelerationStructureMemoryRequirementsInfoNV mem_req_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, + .accelerationStructure = accel_top_nv[idx], + .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV }; - _VK(qvkBindAccelerationStructureMemoryNV(qvk.device, 1, &bind_info)); + VkMemoryRequirements2 mem_req = { .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2 }; - assert(get_scratch_buffer_size(accel_top[idx]) < SIZE_SCRATCH_BUFFER); + qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &mem_req_info, &mem_req); - accel_top_match[idx].instanceCount = accel_create_info.info.instanceCount; - } + assert(mem_req.memoryRequirements.size <= SIZE_SCRATCH_BUFFER); - VkAccelerationStructureInfoNV as_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_NV, - .geometryCount = 0, - .pGeometries = NULL, - .instanceCount = num_instances, - }; + // Build the TLAS - qvkCmdBuildAccelerationStructureNV( + qvkCmdBuildAccelerationStructureNV( cmd_buf, &as_info, buf_instances[idx].buffer, /* instance buffer */ 0 /* instance offset */, VK_FALSE, /* update */ - accel_top[idx], - VK_NULL_HANDLE, /* source acceleration structure ?? */ + accel_top_nv[idx], + VK_NULL_HANDLE, /* source acceleration structure */ buf_accel_scratch.buffer, 0 /* scratch offset */); + } + MEM_BARRIER_BUILD_ACCEL(cmd_buf); /* probably not needed here but doesn't matter */ return VK_SUCCESS; @@ -990,20 +1016,20 @@ vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world static void setup_rt_pipeline(VkCommandBuffer cmd_buf) { - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline); + + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline_layout, 0, 1, rt_descriptor_set + qvk.current_frame_index, 0, 0); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline_layout, 1, 1, &qvk.desc_set_ubo, 0, 0); VkDescriptorSet desc_set_textures = qvk_get_current_desc_set_textures(); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline_layout, 2, 1, &desc_set_textures, 0, 0); - vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, + vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline_layout, 3, 1, &qvk.desc_set_vertex_buffer, 0, 0); - - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_NV, rt_pipeline); } VkResult @@ -1027,15 +1053,49 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) { set_current_gpu(cmd_buf, i); - int idx = qvk.device_count == 1 ? -1 : i; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_NV, 0, sizeof(int), &idx); + pt_push_constants_t push; + push.gpu_index = qvk.device_count == 1 ? -1 : i; + push.bounce = 0; + vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, SBT_RGEN_PRIMARY_RAYS * rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, + if (qvk.use_khr_ray_tracing) + { + assert(buf_shader_binding_table.address); + + VkStridedDeviceAddressRegionKHR raygen = { + .deviceAddress = buf_shader_binding_table.address + SBT_RGEN_PRIMARY_RAYS * shaderGroupBaseAlignment, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR miss_and_hit = { + .deviceAddress = buf_shader_binding_table.address, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR callable = { + .deviceAddress = VK_NULL_HANDLE, + .stride = 0, + .size = 0 + }; + + qvkCmdTraceRaysKHR(cmd_buf, + &raygen, + &miss_and_hit, + &miss_and_hit, + &callable, + qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + } + else // (!qvk.use_khr_ray_tracing) + { + qvkCmdTraceRaysNV(cmd_buf, + buf_shader_binding_table.buffer, SBT_RGEN_PRIMARY_RAYS * shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, VK_NULL_HANDLE, 0, 0, qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + } } set_current_gpu(cmd_buf, ALL_GPUS); @@ -1071,18 +1131,51 @@ vkpt_pt_trace_reflections(VkCommandBuffer cmd_buf, int bounce) { set_current_gpu(cmd_buf, i); - int idx = qvk.device_count == 1 ? -1 : i; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_NV, 0, sizeof(int), &idx); - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_NV, sizeof(int), sizeof(int), &bounce); + pt_push_constants_t push; + push.gpu_index = qvk.device_count == 1 ? -1 : i; + push.bounce = bounce; + vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); int shader = (bounce == 0) ? SBT_RGEN_REFLECT_REFRACT1 : SBT_RGEN_REFLECT_REFRACT2; - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, shader * rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + if (qvk.use_khr_ray_tracing) + { + assert(buf_shader_binding_table.address); + + VkStridedDeviceAddressRegionKHR raygen = { + .deviceAddress = buf_shader_binding_table.address + shader * shaderGroupBaseAlignment, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR miss_and_hit = { + .deviceAddress = buf_shader_binding_table.address, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR callable = { + .deviceAddress = VK_NULL_HANDLE, + .stride = 0, + .size = 0 + }; + + qvkCmdTraceRaysKHR(cmd_buf, + &raygen, + &miss_and_hit, + &miss_and_hit, + &callable, + qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + } + else // (!qvk.use_khr_ray_tracing) + { + qvkCmdTraceRaysNV(cmd_buf, + buf_shader_binding_table.buffer, shader * shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + VK_NULL_HANDLE, 0, 0, + qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + } } set_current_gpu(cmd_buf, ALL_GPUS); @@ -1115,19 +1208,53 @@ vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays) { set_current_gpu(cmd_buf, i); - int idx = qvk.device_count == 1 ? -1 : i; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_NV, 0, sizeof(int), &idx); + pt_push_constants_t push; + push.gpu_index = qvk.device_count == 1 ? -1 : i; + push.bounce = 0; + vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); int rgen_index = SBT_RGEN_DIRECT_LIGHTING; if (cvar_pt_caustics->value != 0) rgen_index = SBT_RGEN_DIRECT_LIGHTING_CAUSTICS; - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, rgen_index * rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + if (qvk.use_khr_ray_tracing) + { + assert(buf_shader_binding_table.address); + + VkStridedDeviceAddressRegionKHR raygen = { + .deviceAddress = buf_shader_binding_table.address + rgen_index * shaderGroupBaseAlignment, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR miss_and_hit = { + .deviceAddress = buf_shader_binding_table.address, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR callable = { + .deviceAddress = VK_NULL_HANDLE, + .stride = 0, + .size = 0 + }; + + qvkCmdTraceRaysKHR(cmd_buf, + &raygen, + &miss_and_hit, + &miss_and_hit, + &callable, + qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + } + else // (!qvk.use_khr_ray_tracing) + { + qvkCmdTraceRaysNV(cmd_buf, + buf_shader_binding_table.buffer, rgen_index * shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + VK_NULL_HANDLE, 0, 0, + qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); + } } set_current_gpu(cmd_buf, ALL_GPUS); @@ -1155,9 +1282,11 @@ vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays) { set_current_gpu(cmd_buf, i); - int idx = qvk.device_count == 1 ? -1 : i; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_NV, 0, sizeof(idx), &idx); - + pt_push_constants_t push; + push.gpu_index = qvk.device_count == 1 ? -1 : i; + push.bounce = 0; + vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); + for (int bounce_ray = 0; bounce_ray < (int)ceilf(num_bounce_rays); bounce_ray++) { int height; @@ -1170,12 +1299,44 @@ vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays) ? SBT_RGEN_INDIRECT_LIGHTING_FIRST : SBT_RGEN_INDIRECT_LIGHTING_SECOND; - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, rgen_index * rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - buf_shader_binding_table.buffer, 0, rt_properties.shaderGroupHandleSize, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, height, qvk.device_count == 1 ? 2 : 1); + if (qvk.use_khr_ray_tracing) + { + assert(buf_shader_binding_table.address); + + VkStridedDeviceAddressRegionKHR raygen = { + .deviceAddress = buf_shader_binding_table.address + rgen_index * shaderGroupBaseAlignment, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR miss_and_hit = { + .deviceAddress = buf_shader_binding_table.address, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR callable = { + .deviceAddress = VK_NULL_HANDLE, + .stride = 0, + .size = 0 + }; + + qvkCmdTraceRaysKHR(cmd_buf, + &raygen, + &miss_and_hit, + &miss_and_hit, + &callable, + qvk.extent_render.width / 2, height, qvk.device_count == 1 ? 2 : 1); + } + else // (!qvk.use_khr_ray_tracing) + { + qvkCmdTraceRaysNV(cmd_buf, + buf_shader_binding_table.buffer, rgen_index * shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, + VK_NULL_HANDLE, 0, 0, + qvk.extent_render.width / 2, height, qvk.device_count == 1 ? 2 : 1); + } BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_COLOR_LF_SH]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_COLOR_LF_COCG]); @@ -1196,20 +1357,19 @@ vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays) VkResult vkpt_pt_destroy() { - for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { + for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) + { vkpt_pt_destroy_toplevel(i); buffer_destroy(buf_instances + i); vkpt_pt_destroy_dynamic(i); - vkpt_pt_destroy_transparent_models(i); - vkpt_pt_destroy_viewer_models(i); - vkpt_pt_destroy_viewer_weapon(i); - vkpt_pt_destroy_explosions(i); } + vkpt_pt_destroy_static(); buffer_destroy(&buf_accel_scratch); vkDestroyDescriptorSetLayout(qvk.device, rt_descriptor_set_layout, NULL); vkDestroyPipelineLayout(qvk.device, rt_pipeline_layout, NULL); vkDestroyDescriptorPool(qvk.device, rt_descriptor_pool, NULL); + return VK_SUCCESS; } @@ -1239,168 +1399,343 @@ vkpt_pt_create_pipelines() } }; + uint32_t num_shader_groups = 0; + char* shader_handles = NULL; + VkPipelineShaderStageCreateInfo shader_stages[] = { - SHADER_STAGE(QVK_MOD_PRIMARY_RAYS_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV), - SHADER_STAGE_SPEC(QVK_MOD_REFLECT_REFRACT_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV, &specInfo[0]), - SHADER_STAGE_SPEC(QVK_MOD_REFLECT_REFRACT_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV, &specInfo[1]), - SHADER_STAGE_SPEC(QVK_MOD_DIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV, &specInfo[0]), - SHADER_STAGE_SPEC(QVK_MOD_DIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV, &specInfo[1]), - SHADER_STAGE_SPEC(QVK_MOD_INDIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV, &specInfo[0]), - SHADER_STAGE_SPEC(QVK_MOD_INDIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_NV, &specInfo[1]), - SHADER_STAGE(QVK_MOD_PATH_TRACER_RMISS, VK_SHADER_STAGE_MISS_BIT_NV), - SHADER_STAGE(QVK_MOD_PATH_TRACER_SHADOW_RMISS, VK_SHADER_STAGE_MISS_BIT_NV), - SHADER_STAGE(QVK_MOD_PATH_TRACER_RCHIT, VK_SHADER_STAGE_CLOSEST_HIT_BIT_NV), - SHADER_STAGE(QVK_MOD_PATH_TRACER_PARTICLE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_NV), - SHADER_STAGE(QVK_MOD_PATH_TRACER_BEAM_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_NV), - SHADER_STAGE(QVK_MOD_PATH_TRACER_EXPLOSION_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_NV), - SHADER_STAGE(QVK_MOD_PATH_TRACER_SPRITE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_NV), + SHADER_STAGE(QVK_MOD_PRIMARY_RAYS_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR), + SHADER_STAGE_SPEC(QVK_MOD_REFLECT_REFRACT_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[0]), + SHADER_STAGE_SPEC(QVK_MOD_REFLECT_REFRACT_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[1]), + SHADER_STAGE_SPEC(QVK_MOD_DIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[0]), + SHADER_STAGE_SPEC(QVK_MOD_DIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[1]), + SHADER_STAGE_SPEC(QVK_MOD_INDIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[0]), + SHADER_STAGE_SPEC(QVK_MOD_INDIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[1]), + SHADER_STAGE(QVK_MOD_PATH_TRACER_RMISS, VK_SHADER_STAGE_MISS_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_SHADOW_RMISS, VK_SHADER_STAGE_MISS_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_RCHIT, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_PARTICLE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_BEAM_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_EXPLOSION_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), + SHADER_STAGE(QVK_MOD_PATH_TRACER_SPRITE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), }; - VkRayTracingShaderGroupCreateInfoNV rt_shader_group_info[] = { - [SBT_RGEN_PRIMARY_RAYS] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 0, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_REFLECT_REFRACT1] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 1, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_REFLECT_REFRACT2] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 2, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_DIRECT_LIGHTING] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 3, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_DIRECT_LIGHTING_CAUSTICS] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 4, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_INDIRECT_LIGHTING_FIRST] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 5, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_INDIRECT_LIGHTING_SECOND] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 6, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RMISS_PATH_TRACER] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 7, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RMISS_SHADOW] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 8, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RCHIT_OPAQUE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = 9, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_PARTICLE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 10, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_BEAM] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 11, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_EXPLOSION] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 12, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_SPRITE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 13, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RCHIT_EMPTY] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - }; + if (qvk.use_khr_ray_tracing) + { + VkRayTracingShaderGroupCreateInfoKHR rt_shader_group_info[] = { + [SBT_RGEN_PRIMARY_RAYS] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 0, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RGEN_REFLECT_REFRACT1] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 1, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RGEN_REFLECT_REFRACT2] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 2, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RGEN_DIRECT_LIGHTING] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 3, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RGEN_DIRECT_LIGHTING_CAUSTICS] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 4, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RGEN_INDIRECT_LIGHTING_FIRST] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 5, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RGEN_INDIRECT_LIGHTING_SECOND] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 6, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RMISS_PATH_TRACER] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 7, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RMISS_SHADOW] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 8, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RCHIT_OPAQUE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = 9, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_PARTICLE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 10, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_BEAM] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 11, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_EXPLOSION] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 12, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_SPRITE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 13, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RCHIT_EMPTY] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + }; - VkRayTracingPipelineCreateInfoNV rt_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV, - .stageCount = LENGTH(shader_stages), - .pStages = shader_stages, - .groupCount = LENGTH(rt_shader_group_info), - .pGroups = rt_shader_group_info, - .layout = rt_pipeline_layout, - .maxRecursionDepth = 1, - }; + VkPipelineLibraryCreateInfoKHR library_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR }; + VkRayTracingPipelineCreateInfoKHR rt_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, + .pNext = NULL, + .flags = 0, + .stageCount = LENGTH(shader_stages), + .pStages = shader_stages, + .groupCount = LENGTH(rt_shader_group_info), + .pGroups = rt_shader_group_info, + .maxPipelineRayRecursionDepth = 1, + .pLibraryInfo = &library_info, + .pLibraryInterface = NULL, + .pDynamicState = NULL, + .layout = rt_pipeline_layout, + .basePipelineHandle = rt_pipeline, + .basePipelineIndex = 0 + }; - _VK(qvkCreateRayTracingPipelinesNV(qvk.device, NULL, 1, &rt_pipeline_info, NULL, &rt_pipeline )); + _VK(qvkCreateRayTracingPipelinesKHR(qvk.device, VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &rt_pipeline_info, NULL, &rt_pipeline)); - uint32_t num_groups = LENGTH(rt_shader_group_info); - uint32_t shader_binding_table_size = rt_properties.shaderGroupHandleSize * num_groups; + num_shader_groups = LENGTH(rt_shader_group_info); - /* pt */ + // get the shader handles in a dense array from VK + uint32_t shader_handle_array_size = num_shader_groups * shaderGroupHandleSize; + shader_handles = alloca(shader_handle_array_size); + _VK(qvkGetRayTracingShaderGroupHandlesKHR(qvk.device, rt_pipeline, 0, num_shader_groups, + shader_handle_array_size, shader_handles)); + + } + else // (!qvk.use_khr_ray_tracing) + { + VkRayTracingShaderGroupCreateInfoNV rt_shader_group_info[] = + { + [SBT_RGEN_PRIMARY_RAYS] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 0, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RGEN_REFLECT_REFRACT1] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 1, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RGEN_REFLECT_REFRACT2] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 2, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RGEN_DIRECT_LIGHTING] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 3, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RGEN_DIRECT_LIGHTING_CAUSTICS] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 4, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RGEN_INDIRECT_LIGHTING_FIRST] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 5, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RGEN_INDIRECT_LIGHTING_SECOND] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 6, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RMISS_PATH_TRACER] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 7, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RMISS_SHADOW] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 8, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RCHIT_OPAQUE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = 9, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_PARTICLE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 10, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_BEAM] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 11, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_EXPLOSION] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 12, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_SPRITE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 13, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RCHIT_EMPTY] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + }; + + VkRayTracingPipelineCreateInfoNV rt_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV, + .stageCount = LENGTH(shader_stages), + .pStages = shader_stages, + .groupCount = LENGTH(rt_shader_group_info), + .pGroups = rt_shader_group_info, + .layout = rt_pipeline_layout, + .maxRecursionDepth = 1, + }; + + _VK(qvkCreateRayTracingPipelinesNV(qvk.device, NULL, 1, &rt_pipeline_info, NULL, &rt_pipeline)); + + num_shader_groups = LENGTH(rt_shader_group_info); + + // get the shader handles in a dense array from VK + uint32_t shader_handle_array_size = num_shader_groups * shaderGroupHandleSize; + shader_handles = alloca(shader_handle_array_size); + _VK(qvkGetRayTracingShaderGroupHandlesNV(qvk.device, rt_pipeline, 0, num_shader_groups, + shader_handle_array_size, shader_handles)); + } + + // create the SBT buffer + uint32_t shader_binding_table_size = shaderGroupBaseAlignment * num_shader_groups; _VK(buffer_create(&buf_shader_binding_table, shader_binding_table_size, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); - void *shader_binding_table = buffer_map(&buf_shader_binding_table); - _VK(qvkGetRayTracingShaderGroupHandlesNV(qvk.device, rt_pipeline, 0, num_groups, - shader_binding_table_size, shader_binding_table)); + // copy/unpack the shader handles into the SBT: + // shaderGroupBaseAlignment is likely greater than shaderGroupHandleSize (64 vs 32 on NV) + char* shader_binding_table = (char*)buffer_map(&buf_shader_binding_table); + for (uint32_t group = 0; group < num_shader_groups; group++) + { + memcpy( + shader_binding_table + group * shaderGroupBaseAlignment, + shader_handles + group * shaderGroupHandleSize, + shaderGroupHandleSize); + } buffer_unmap(&buf_shader_binding_table); shader_binding_table = NULL; diff --git a/src/refresh/vkpt/precomputed_sky.c b/src/refresh/vkpt/precomputed_sky.c index 18319dbc3..fab9cff58 100644 --- a/src/refresh/vkpt/precomputed_sky.c +++ b/src/refresh/vkpt/precomputed_sky.c @@ -161,9 +161,9 @@ VkResult UploadImage(void* FirstPixel, size_t total_size, unsigned int Width, un }; #ifdef VKPT_DEVICE_GROUPS - VkMemoryAllocateFlagsInfoKHR mem_alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR, - .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR, + VkMemoryAllocateFlagsInfo mem_alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, .deviceMask = (1 << qvk.device_count) - 1 }; diff --git a/src/refresh/vkpt/shader/direct_lighting.rgen b/src/refresh/vkpt/shader/direct_lighting.rgen index b52edd4cf..25acf1476 100644 --- a/src/refresh/vkpt/shader/direct_lighting.rgen +++ b/src/refresh/vkpt/shader/direct_lighting.rgen @@ -119,11 +119,11 @@ direct_lighting(ivec2 ipos, bool is_odd_checkerboard, out vec3 high_freq, out ve void main() { - ivec2 ipos = ivec2(gl_LaunchIDNV.xy); - if(gl_LaunchIDNV.z != 0) + ivec2 ipos = ivec2(rt_LaunchID.xy); + if(rt_LaunchID.z != 0) ipos.x += global_ubo.width / 2; - bool is_odd_checkerboard = (gl_LaunchIDNV.z != 0) || (push_constants.gpu_index == 1); + bool is_odd_checkerboard = (rt_LaunchID.z != 0) || (push_constants.gpu_index == 1); vec3 high_freq, specular; direct_lighting(ipos, is_odd_checkerboard, high_freq, specular); diff --git a/src/refresh/vkpt/shader/indirect_lighting.rgen b/src/refresh/vkpt/shader/indirect_lighting.rgen index 9f8d11804..2f23f72e7 100644 --- a/src/refresh/vkpt/shader/indirect_lighting.rgen +++ b/src/refresh/vkpt/shader/indirect_lighting.rgen @@ -411,8 +411,8 @@ indirect_lighting( void main() { - ivec2 ipos = ivec2(gl_LaunchIDNV.xy); - if(gl_LaunchIDNV.z != 0) + ivec2 ipos = ivec2(rt_LaunchID.xy); + if(rt_LaunchID.z != 0) ipos.x += global_ubo.width / 2; // Half-resolution tracing happens in the "low" GI setting diff --git a/src/refresh/vkpt/shader/path_tracer.h b/src/refresh/vkpt/shader/path_tracer.h index 9909999d2..a6f3f34d4 100644 --- a/src/refresh/vkpt/shader/path_tracer.h +++ b/src/refresh/vkpt/shader/path_tracer.h @@ -117,7 +117,36 @@ Converting skyboxes to local lights provides two benefits: // ========================================================================== // */ +#ifdef NV_RAY_TRACING + #extension GL_NV_ray_tracing : require +#define rt_accelerationStructure accelerationStructureNV +#define rt_hitAttribute hitAttributeNV +#define rt_HitT gl_HitTNV +#define rt_ignoreIntersection ignoreIntersectionNV() +#define rt_InstanceCustomIndex gl_InstanceCustomIndexNV +#define rt_LaunchID gl_LaunchIDNV +#define rt_rayPayload rayPayloadNV +#define rt_rayPayloadIn rayPayloadInNV +#define rt_traceRay traceNV +#define rt_WorldRayDirection gl_WorldRayDirectionNV + +#else + +#extension GL_EXT_ray_tracing : require +#define rt_accelerationStructure accelerationStructureEXT +#define rt_hitAttribute hitAttributeEXT +#define rt_HitT gl_HitTEXT +#define rt_ignoreIntersection ignoreIntersectionEXT +#define rt_InstanceCustomIndex gl_InstanceCustomIndexEXT +#define rt_LaunchID gl_LaunchIDEXT +#define rt_rayPayload rayPayloadEXT +#define rt_rayPayloadIn rayPayloadInEXT +#define rt_traceRay traceRayEXT +#define rt_WorldRayDirection gl_WorldRayDirectionEXT + +#endif + #extension GL_ARB_separate_shader_objects : enable #extension GL_EXT_nonuniform_qualifier : enable diff --git a/src/refresh/vkpt/shader/path_tracer.rchit b/src/refresh/vkpt/shader/path_tracer.rchit index 7519ac577..8a4dd68ac 100644 --- a/src/refresh/vkpt/shader/path_tracer.rchit +++ b/src/refresh/vkpt/shader/path_tracer.rchit @@ -22,22 +22,22 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "path_tracer.h" -rayPayloadInNV RayPayload ray_payload; +rt_rayPayloadIn RayPayload ray_payload; -hitAttributeNV vec3 hit_attribs; +rt_hitAttribute vec3 hit_attribs; void main() { ray_payload.barycentric = hit_attribs.xy; - ray_payload.instance_prim = gl_PrimitiveID + gl_InstanceCustomIndexNV & AS_INSTANCE_MASK_OFFSET; - if((gl_InstanceCustomIndexNV & AS_INSTANCE_FLAG_DYNAMIC) != 0) + ray_payload.instance_prim = gl_PrimitiveID + rt_InstanceCustomIndex & AS_INSTANCE_MASK_OFFSET; + if((rt_InstanceCustomIndex & AS_INSTANCE_FLAG_DYNAMIC) != 0) { ray_payload.instance_prim |= INSTANCE_DYNAMIC_FLAG; } - if((gl_InstanceCustomIndexNV & AS_INSTANCE_FLAG_SKY) != 0) + if((rt_InstanceCustomIndex & AS_INSTANCE_FLAG_SKY) != 0) { ray_payload.instance_prim |= INSTANCE_SKY_FLAG; } - ray_payload.hit_distance = gl_HitTNV; + ray_payload.hit_distance = rt_HitT; } diff --git a/src/refresh/vkpt/shader/path_tracer.rmiss b/src/refresh/vkpt/shader/path_tracer.rmiss index 42f4e3282..4ad5e45ff 100644 --- a/src/refresh/vkpt/shader/path_tracer.rmiss +++ b/src/refresh/vkpt/shader/path_tracer.rmiss @@ -22,7 +22,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "path_tracer.h" -rayPayloadInNV RayPayload ray_payload; +rt_rayPayloadIn RayPayload ray_payload; void main() diff --git a/src/refresh/vkpt/shader/path_tracer_beam.rahit b/src/refresh/vkpt/shader/path_tracer_beam.rahit index c333abf80..b59d3c0e5 100644 --- a/src/refresh/vkpt/shader/path_tracer_beam.rahit +++ b/src/refresh/vkpt/shader/path_tracer_beam.rahit @@ -32,8 +32,8 @@ with this program; if not, write to the Free Software Foundation, Inc., layout(set = 0, binding = 2) uniform textureBuffer beam_color_buffer; -rayPayloadInNV RayPayload ray_payload; -hitAttributeNV vec2 hit_attribs; +rt_rayPayloadIn RayPayload ray_payload; +rt_hitAttribute vec2 hit_attribs; void main() { @@ -51,19 +51,19 @@ void main() color.rgb *= global_ubo.prev_adapted_luminance * 20; int texnum = global_ubo.current_frame_idx & (NUM_BLUE_NOISE_TEX - 1); - ivec2 texpos = ivec2(gl_LaunchIDNV.xy) & ivec2(BLUE_NOISE_RES - 1); + ivec2 texpos = ivec2(rt_LaunchID.xy) & ivec2(BLUE_NOISE_RES - 1); float noise = texelFetch(TEX_BLUE_NOISE, ivec3(texpos, texnum), 0).r; color.rgb *= noise * noise + 0.1; - if(ray_payload.max_transparent_distance < gl_HitTNV) + if(ray_payload.max_transparent_distance < rt_HitT) { ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), color)); - ray_payload.max_transparent_distance = gl_HitTNV; + ray_payload.max_transparent_distance = rt_HitT; } else ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(ray_payload.transparency))); } - ignoreIntersectionNV(); + rt_ignoreIntersection; } diff --git a/src/refresh/vkpt/shader/path_tracer_explosion.rahit b/src/refresh/vkpt/shader/path_tracer_explosion.rahit index 097c6654f..835d4cb29 100644 --- a/src/refresh/vkpt/shader/path_tracer_explosion.rahit +++ b/src/refresh/vkpt/shader/path_tracer_explosion.rahit @@ -29,12 +29,12 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_READONLY 1 #include "vertex_buffer.h" -rayPayloadInNV RayPayload ray_payload; -hitAttributeNV vec2 hit_attribs; +rt_rayPayloadIn RayPayload ray_payload; +rt_hitAttribute vec2 hit_attribs; void main() { - const uint primitive_id = gl_PrimitiveID + gl_InstanceCustomIndexNV & AS_INSTANCE_MASK_OFFSET; + const uint primitive_id = gl_PrimitiveID + rt_InstanceCustomIndex & AS_INSTANCE_MASK_OFFSET; const Triangle triangle = get_instanced_triangle(primitive_id); const vec3 barycentric = vec3(1.0 - hit_attribs.x - hit_attribs.y, hit_attribs.x, hit_attribs.y); @@ -46,7 +46,7 @@ void main() if((triangle.material_id & MATERIAL_KIND_MASK) == MATERIAL_KIND_EXPLOSION) { const vec3 normal = triangle.normals * barycentric; - emission.rgb = mix(emission.rgb, get_explosion_color(normal, gl_WorldRayDirectionNV.xyz), triangle.alpha); + emission.rgb = mix(emission.rgb, get_explosion_color(normal, rt_WorldRayDirection.xyz), triangle.alpha); emission.rgb *= global_ubo.pt_explosion_brightness; } @@ -55,14 +55,14 @@ void main() emission.rgb *= global_ubo.prev_adapted_luminance * 500; - if(ray_payload.max_transparent_distance < gl_HitTNV) + if(ray_payload.max_transparent_distance < rt_HitT) { ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), emission)); - ray_payload.max_transparent_distance = gl_HitTNV; + ray_payload.max_transparent_distance = rt_HitT; } else ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(emission, unpackHalf4x16(ray_payload.transparency))); - ignoreIntersectionNV(); + rt_ignoreIntersection; } diff --git a/src/refresh/vkpt/shader/path_tracer_particle.rahit b/src/refresh/vkpt/shader/path_tracer_particle.rahit index bc86263a7..84d72e1b6 100644 --- a/src/refresh/vkpt/shader/path_tracer_particle.rahit +++ b/src/refresh/vkpt/shader/path_tracer_particle.rahit @@ -29,8 +29,8 @@ with this program; if not, write to the Free Software Foundation, Inc., layout(set = 0, binding = 1) uniform textureBuffer particle_color_buffer; -rayPayloadInNV RayPayload ray_payload; -hitAttributeNV vec2 hit_attribs; +rt_rayPayloadIn RayPayload ray_payload; +rt_hitAttribute vec2 hit_attribs; void main() { @@ -48,15 +48,15 @@ void main() color.rgb *= global_ubo.prev_adapted_luminance * 500; - if(ray_payload.max_transparent_distance < gl_HitTNV) + if(ray_payload.max_transparent_distance < rt_HitT) { ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), color)); - ray_payload.max_transparent_distance = gl_HitTNV; + ray_payload.max_transparent_distance = rt_HitT; } else ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(ray_payload.transparency))); } - ignoreIntersectionNV(); + rt_ignoreIntersection; } diff --git a/src/refresh/vkpt/shader/path_tracer_rgen.h b/src/refresh/vkpt/shader/path_tracer_rgen.h index 243677957..00bbe1ea7 100644 --- a/src/refresh/vkpt/shader/path_tracer_rgen.h +++ b/src/refresh/vkpt/shader/path_tracer_rgen.h @@ -22,7 +22,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define RAY_GEN_DESCRIPTOR_SET_IDX 0 layout(set = RAY_GEN_DESCRIPTOR_SET_IDX, binding = 0) -uniform accelerationStructureNV topLevelAS; +uniform rt_accelerationStructure topLevelAS; #define GLOBAL_TEXTURES_DESC_SET_IDX 2 #include "global_textures.h" @@ -67,8 +67,8 @@ uniform accelerationStructureNV topLevelAS; #define RT_PAYLOAD_SHADOW 0 #define RT_PAYLOAD_BRDF 1 -layout(location = RT_PAYLOAD_SHADOW) rayPayloadNV RayPayloadShadow ray_payload_shadow; -layout(location = RT_PAYLOAD_BRDF) rayPayloadNV RayPayload ray_payload_brdf; +layout(location = RT_PAYLOAD_SHADOW) rt_rayPayload RayPayloadShadow ray_payload_shadow; +layout(location = RT_PAYLOAD_BRDF) rt_rayPayload RayPayload ray_payload_brdf; uint rng_seed; @@ -111,17 +111,17 @@ ivec2 get_image_position() { ivec2 pos; - bool is_even_checkerboard = push_constants.gpu_index == 0 || push_constants.gpu_index < 0 && gl_LaunchIDNV.z == 0; + bool is_even_checkerboard = push_constants.gpu_index == 0 || push_constants.gpu_index < 0 && rt_LaunchID.z == 0; if(global_ubo.pt_swap_checkerboard != 0) is_even_checkerboard = !is_even_checkerboard; if (is_even_checkerboard) { - pos.x = int(gl_LaunchIDNV.x * 2) + int(gl_LaunchIDNV.y & 1); + pos.x = int(rt_LaunchID.x * 2) + int(rt_LaunchID.y & 1); } else { - pos.x = int(gl_LaunchIDNV.x * 2 + 1) - int(gl_LaunchIDNV.y & 1); + pos.x = int(rt_LaunchID.x * 2 + 1) - int(rt_LaunchID.y & 1); } - pos.y = int(gl_LaunchIDNV.y); + pos.y = int(rt_LaunchID.y); return pos; } @@ -250,13 +250,13 @@ trace_ray(Ray ray, bool cull_back_faces, int instance_mask) { uint rayFlags = 0; if(cull_back_faces) - rayFlags |=gl_RayFlagsCullBackFacingTrianglesNV; + rayFlags |=gl_RayFlagsCullBackFacingTrianglesEXT; ray_payload_brdf.transparency = uvec2(0); ray_payload_brdf.hit_distance = 0; ray_payload_brdf.max_transparent_distance = 0; - traceNV( topLevelAS, rayFlags, instance_mask, + rt_traceRay( topLevelAS, rayFlags, instance_mask, SBT_RCHIT_OPAQUE /*sbtRecordOffset*/, 0 /*sbtRecordStride*/, SBT_RMISS_PATH_TRACER /*missIndex*/, ray.origin, ray.t_min, ray.direction, ray.t_max, RT_PAYLOAD_BRDF); } @@ -279,11 +279,11 @@ Ray get_shadow_ray(vec3 p1, vec3 p2, float tmin) float trace_shadow_ray(Ray ray, int cull_mask) { - const uint rayFlags = gl_RayFlagsOpaqueNV | gl_RayFlagsTerminateOnFirstHitNV; + const uint rayFlags = gl_RayFlagsOpaqueEXT | gl_RayFlagsTerminateOnFirstHitEXT; ray_payload_shadow.missed = 0; - traceNV( topLevelAS, rayFlags, cull_mask, + rt_traceRay( topLevelAS, rayFlags, cull_mask, SBT_RCHIT_EMPTY /*sbtRecordOffset*/, 0 /*sbtRecordStride*/, SBT_RMISS_SHADOW /*missIndex*/, ray.origin, ray.t_min, ray.direction, ray.t_max, RT_PAYLOAD_SHADOW); @@ -295,7 +295,7 @@ trace_caustic_ray(Ray ray, int surface_medium) { ray_payload_brdf.hit_distance = -1; - traceNV(topLevelAS, gl_RayFlagsCullBackFacingTrianglesNV, AS_FLAG_TRANSPARENT, + rt_traceRay(topLevelAS, gl_RayFlagsCullBackFacingTrianglesEXT, AS_FLAG_TRANSPARENT, SBT_RCHIT_OPAQUE, 0, SBT_RMISS_PATH_TRACER, ray.origin, ray.t_min, ray.direction, ray.t_max, RT_PAYLOAD_BRDF); diff --git a/src/refresh/vkpt/shader/path_tracer_shadow.rmiss b/src/refresh/vkpt/shader/path_tracer_shadow.rmiss index 5c717441e..46e3e9acd 100644 --- a/src/refresh/vkpt/shader/path_tracer_shadow.rmiss +++ b/src/refresh/vkpt/shader/path_tracer_shadow.rmiss @@ -21,7 +21,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "path_tracer.h" -layout(location = 1) rayPayloadInNV RayPayloadShadow ray_payload_shadow; +layout(location = 1) rt_rayPayloadIn RayPayloadShadow ray_payload_shadow; void main() diff --git a/src/refresh/vkpt/shader/path_tracer_sprite.rahit b/src/refresh/vkpt/shader/path_tracer_sprite.rahit index 6997d37b2..2929634d2 100644 --- a/src/refresh/vkpt/shader/path_tracer_sprite.rahit +++ b/src/refresh/vkpt/shader/path_tracer_sprite.rahit @@ -32,8 +32,8 @@ uniform utextureBuffer sprite_texure_buffer; #define VERTEX_READONLY 1 #include "vertex_buffer.h" -rayPayloadInNV RayPayload ray_payload; -hitAttributeNV vec2 hit_attribs; +rt_rayPayloadIn RayPayload ray_payload; +rt_hitAttribute vec2 hit_attribs; void main() { @@ -63,12 +63,12 @@ void main() color.rgb *= global_ubo.prev_adapted_luminance * 2000; } - if(ray_payload.max_transparent_distance < gl_HitTNV) + if(ray_payload.max_transparent_distance < rt_HitT) ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(unpackHalf4x16(ray_payload.transparency), color)); else ray_payload.transparency = packHalf4x16(alpha_blend_premultiplied(color, unpackHalf4x16(ray_payload.transparency))); - ray_payload.max_transparent_distance = gl_HitTNV; + ray_payload.max_transparent_distance = rt_HitT; - ignoreIntersectionNV(); + rt_ignoreIntersection; } diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index 6dac7ca14..a798c5f5d 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -111,11 +111,11 @@ void generate_rng_seed(ivec2 ipos, bool is_odd_checkerboard) void main() { - ivec2 ipos = ivec2(gl_LaunchIDNV.xy); - if(gl_LaunchIDNV.z != 0) + ivec2 ipos = ivec2(rt_LaunchID.xy); + if(rt_LaunchID.z != 0) ipos.x += global_ubo.width / 2; - bool is_odd_checkerboard = (gl_LaunchIDNV.z != 0) || (push_constants.gpu_index == 1); + bool is_odd_checkerboard = (rt_LaunchID.z != 0) || (push_constants.gpu_index == 1); generate_rng_seed(ipos, is_odd_checkerboard); diff --git a/src/refresh/vkpt/shader/reflect_refract.rgen b/src/refresh/vkpt/shader/reflect_refract.rgen index 99cff6464..138e3f7b5 100644 --- a/src/refresh/vkpt/shader/reflect_refract.rgen +++ b/src/refresh/vkpt/shader/reflect_refract.rgen @@ -62,8 +62,8 @@ get_primary_ray(vec2 screen_pos) void main() { - ivec2 ipos = ivec2(gl_LaunchIDNV.xy); - if(gl_LaunchIDNV.z != 0) + ivec2 ipos = ivec2(rt_LaunchID.xy); + if(rt_LaunchID.z != 0) ipos.x += global_ubo.width / 2; vec4 position_material = imageLoad(IMG_PT_SHADING_POSITION, ipos); @@ -88,7 +88,7 @@ main() primary_is_transparent )) return; - bool is_odd_checkerboard = (gl_LaunchIDNV.z != 0) || (push_constants.gpu_index == 1); + bool is_odd_checkerboard = (rt_LaunchID.z != 0) || (push_constants.gpu_index == 1); rng_seed = texelFetch(TEX_ASVGF_RNG_SEED_A, ipos, 0).r; diff --git a/src/refresh/vkpt/textures.c b/src/refresh/vkpt/textures.c index b94df6e7a..0f4169871 100644 --- a/src/refresh/vkpt/textures.c +++ b/src/refresh/vkpt/textures.c @@ -646,7 +646,7 @@ IMG_Unload_RTX(image_t *image) if (tex_images[index]) { - const uint32_t frame_index = (qvk.frame_counter + MAX_FRAMES_IN_FLIGHT) % DESTROY_LATENCY; + const uint32_t frame_index = (qvk.frame_counter + MAX_FRAMES_IN_FLIGHT + 1) % DESTROY_LATENCY; UnusedResources* unused_resources = texture_system.unused_resources + frame_index; const uint32_t unused_index = unused_resources->image_num++; @@ -1077,9 +1077,9 @@ vkpt_textures_destroy() } #ifdef VKPT_DEVICE_GROUPS -static VkMemoryAllocateFlagsInfoKHR mem_alloc_flags_broadcast = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR, - .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR, +static VkMemoryAllocateFlagsInfo mem_alloc_flags_broadcast = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, }; #endif @@ -1521,8 +1521,8 @@ LIST_IMAGES_A_B vkGetImageMemoryRequirements(qvk.device, qvk.images_local[d][i], &mem_req); } - VkBindImageMemoryDeviceGroupInfoKHR device_group_info = { - .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_DEVICE_GROUP_INFO_KHR, + VkBindImageMemoryDeviceGroupInfo device_group_info = { + .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_DEVICE_GROUP_INFO, .pNext = NULL, .deviceIndexCount = qvk.device_count, .pDeviceIndices = device_indices, @@ -1530,15 +1530,15 @@ LIST_IMAGES_A_B .pSplitInstanceBindRegions = NULL, }; - VkBindImageMemoryInfoKHR bind_info = { - .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO_KHR, + VkBindImageMemoryInfo bind_info = { + .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO, .pNext = &device_group_info, .image = qvk.images_local[d][i], .memory = mem_images[i], .memoryOffset = 0, }; - _VK(qvkBindImageMemory2KHR(qvk.device, 1, &bind_info)); + _VK(vkBindImageMemory2(qvk.device, 1, &bind_info)); } } #endif @@ -1626,7 +1626,7 @@ LIST_IMAGES_A_B [VKPT_IMG_##_name] = { \ .imageLayout = VK_IMAGE_LAYOUT_GENERAL, \ .imageView = qvk.images_views[VKPT_IMG_##_name], \ - .sampler = qvk.tex_sampler, \ + .sampler = qvk.tex_sampler_nearest, \ }, LIST_IMAGES @@ -1637,6 +1637,8 @@ LIST_IMAGES_A_B for(int i = VKPT_IMG_BLOOM_HBLUR; i <= VKPT_IMG_BLOOM_VBLUR; i++) { img_info[i].sampler = qvk.tex_sampler_linear_clamp; } + img_info[VKPT_IMG_ASVGF_TAA_A].sampler = qvk.tex_sampler; + img_info[VKPT_IMG_ASVGF_TAA_B].sampler = qvk.tex_sampler; VkWriteDescriptorSet output_img_write[NUM_IMAGES * 2]; diff --git a/src/refresh/vkpt/transparency.c b/src/refresh/vkpt/transparency.c index 01b256dea..1d01b7613 100644 --- a/src/refresh/vkpt/transparency.c +++ b/src/refresh/vkpt/transparency.c @@ -16,6 +16,7 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ +#include #include "shared/shared.h" #include "vkpt.h" #include "vk_util.h" @@ -29,9 +30,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #define TR_COLOR_SIZE 4 * sizeof(float) #define TR_SPRITE_INFO_SIZE 2 * sizeof(float) -#define TR_BLAS_BUILD_FLAGS VK_BUILD_ACCELERATION_STRUCTURE_ALLOW_UPDATE_BIT_NV | \ - VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_NV - struct { size_t vertex_position_host_offset; @@ -39,10 +37,8 @@ struct size_t beam_color_host_offset; size_t sprite_info_host_offset; - size_t beam_scratch_device_offset; size_t beam_vertex_device_offset; - size_t sprite_scratch_device_offset; size_t sprite_vertex_device_offset; size_t host_buffer_size; @@ -50,35 +46,24 @@ struct unsigned int particle_num; unsigned int beam_num; unsigned int sprite_num; - unsigned int blas_beam_num; - unsigned int blas_particle_num; - unsigned int blas_sprite_num; unsigned int host_frame_index; unsigned int host_buffered_frame_num; char* mapped_host_buffer; - VkAccelerationStructureNV particle_blas; - VkAccelerationStructureNV beam_blas; - VkAccelerationStructureNV sprite_blas; - VkBuffer host_buffer; - VkBuffer scratch_buffer; - VkBuffer vertex_buffer; - VkBuffer index_buffer; - VkBuffer particle_color_buffer; - VkBuffer beam_color_buffer; - VkBuffer sprite_info_buffer; - VkDeviceMemory host_buffer_memory; - VkDeviceMemory device_buffer_memory; - VkDeviceMemory device_blas_memory; + BufferResource_t vertex_buffer; + BufferResource_t index_buffer; + BufferResource_t particle_color_buffer; + BufferResource_t beam_color_buffer; + BufferResource_t sprite_info_buffer; VkBufferView particle_color_buffer_view; VkBufferView beam_color_buffer_view; VkBufferView sprite_info_buffer_view; + VkBuffer host_buffer; + VkDeviceMemory host_buffer_memory; VkBufferMemoryBarrier transfer_barriers[4]; } transparency; // initialization static void create_buffers(); -static void create_blas(); -static void allocate_and_bind_memory_to_blas(); static qboolean allocate_and_bind_memory_to_buffers(); static void create_buffer_views(); static void fill_index_buffer(); @@ -88,9 +73,6 @@ static void write_particle_geometry(const float* view_matrix, const particle_t* static void write_beam_geometry(const float* view_matrix, const entity_t* entities, int entity_num); static void write_sprite_geometry(const float* view_matrix, const entity_t* entities, int entity_num); static void upload_geometry(VkCommandBuffer command_buffer); -static void update_particle_blas(VkCommandBuffer command_buffer); -static void update_beam_blas(VkCommandBuffer command_buffer); -static void update_sprite_blas(VkCommandBuffer command_buffer); cvar_t* cvar_pt_particle_size = NULL; cvar_t* cvar_pt_beam_width = NULL; @@ -114,8 +96,8 @@ void cast_u32_to_f32_color(int color_index, const color_t* pcolor, float* color_ qboolean initialize_transparency() { cvar_pt_particle_size = Cvar_Get("pt_particle_size", "0.35", 0); - cvar_pt_beam_width = Cvar_Get("pt_beam_width", "1.0", 0); - cvar_pt_beam_lights = Cvar_Get("pt_beam_lights", "1.0", 0); + cvar_pt_beam_width = Cvar_Get("pt_beam_width", "1.0", 0); + cvar_pt_beam_lights = Cvar_Get("pt_beam_lights", "1.0", 0); memset(&transparency, 0, sizeof(transparency)); @@ -135,14 +117,12 @@ qboolean initialize_transparency() transparency.host_frame_size = particle_data_size + beam_data_size + sprite_data_size; transparency.host_buffer_size = transparency.host_buffered_frame_num * transparency.host_frame_size; - create_blas(transparency); - create_buffers(transparency); + create_buffers(); - if (allocate_and_bind_memory_to_buffers(transparency) != VK_TRUE) + if (allocate_and_bind_memory_to_buffers() != VK_TRUE) return qfalse; create_buffer_views(transparency); - allocate_and_bind_memory_to_blas(transparency); fill_index_buffer(transparency); return qtrue; @@ -153,19 +133,14 @@ void destroy_transparency() vkDestroyBufferView(qvk.device, transparency.particle_color_buffer_view, NULL); vkDestroyBufferView(qvk.device, transparency.beam_color_buffer_view, NULL); vkDestroyBufferView(qvk.device, transparency.sprite_info_buffer_view, NULL); + buffer_destroy(&transparency.vertex_buffer); + buffer_destroy(&transparency.index_buffer); + buffer_destroy(&transparency.particle_color_buffer); + buffer_destroy(&transparency.beam_color_buffer); + buffer_destroy(&transparency.sprite_info_buffer); + vkDestroyBuffer(qvk.device, transparency.host_buffer, NULL); - vkDestroyBuffer(qvk.device, transparency.scratch_buffer, NULL); - vkDestroyBuffer(qvk.device, transparency.vertex_buffer, NULL); - vkDestroyBuffer(qvk.device, transparency.index_buffer, NULL); - vkDestroyBuffer(qvk.device, transparency.particle_color_buffer, NULL); - vkDestroyBuffer(qvk.device, transparency.beam_color_buffer, NULL); - vkDestroyBuffer(qvk.device, transparency.sprite_info_buffer, NULL); - qvkDestroyAccelerationStructureNV(qvk.device, transparency.particle_blas, NULL); - qvkDestroyAccelerationStructureNV(qvk.device, transparency.beam_blas, NULL); - qvkDestroyAccelerationStructureNV(qvk.device, transparency.sprite_blas, NULL); vkFreeMemory(qvk.device, transparency.host_buffer_memory, NULL); - vkFreeMemory(qvk.device, transparency.device_buffer_memory, NULL); - vkFreeMemory(qvk.device, transparency.device_blas_memory, NULL); } void update_transparency(VkCommandBuffer command_buffer, const float* view_matrix, @@ -214,28 +189,45 @@ void update_transparency(VkCommandBuffer command_buffer, const float* view_matri } } -void build_transparency_blas(VkCommandBuffer cmd_buf) +void vkpt_get_transparency_buffers( + vkpt_transparency_t ttype, + BufferResource_t** vertex_buffer, + uint64_t* vertex_offset, + BufferResource_t** index_buffer, + uint64_t* index_offset, + uint32_t* num_vertices, + uint32_t* num_indices) { - update_particle_blas(cmd_buf); - update_beam_blas(cmd_buf); - update_sprite_blas(cmd_buf); + *vertex_buffer = &transparency.vertex_buffer; + *index_buffer = &transparency.index_buffer; + *index_offset = 0; - // No barrier here because a single barrier is used later in the pipeline, after building all the BLAS-es -} + switch (ttype) + { + case VKPT_TRANSPARENCY_PARTICLES: + *vertex_offset = 0; + *num_vertices = transparency.particle_num * 4; + *num_indices = transparency.particle_num * 6; + return; -VkAccelerationStructureNV get_transparency_particle_blas() -{ - return transparency.particle_blas; -} + case VKPT_TRANSPARENCY_BEAMS: + *vertex_offset = transparency.beam_vertex_device_offset; + *num_vertices = transparency.beam_num * 4; + *num_indices = transparency.beam_num * 6; + return; -VkAccelerationStructureNV get_transparency_beam_blas() -{ - return transparency.beam_blas; -} + case VKPT_TRANSPARENCY_SPRITES: + *vertex_offset = transparency.sprite_vertex_device_offset; + *num_vertices = transparency.sprite_num * 4; + *num_indices = transparency.sprite_num * 6; + return; -VkAccelerationStructureNV get_transparency_sprite_blas() -{ - return transparency.sprite_blas; + default: + *vertex_offset = transparency.sprite_vertex_device_offset; + *num_vertices = 0; + *num_indices = 0; + return; + } } VkBufferView get_transparency_particle_color_buffer_view() @@ -325,7 +317,7 @@ static void write_particle_geometry(const float* view_matrix, const particle_t* static void write_beam_geometry(const float* view_matrix, const entity_t* entities, int entity_num) { const float beam_width = cvar_pt_beam_width->value; - const float hdr_factor = cvar_pt_particle_emissive->value; + const float hdr_factor = cvar_pt_particle_emissive->value; const vec3_t view_y = { view_matrix[1], view_matrix[5], view_matrix[9] }; @@ -491,11 +483,11 @@ qboolean vkpt_build_cylinder_light(light_poly_t* light_list, int* num_lights, in void vkpt_build_beam_lights(light_poly_t* light_list, int* num_lights, int max_lights, bsp_t *bsp, entity_t* entities, int num_entites, float adapted_luminance) { - const float beam_width = cvar_pt_beam_width->value; - const float hdr_factor = cvar_pt_beam_lights->value * adapted_luminance * 20.f; + const float beam_width = cvar_pt_beam_width->value; + const float hdr_factor = cvar_pt_beam_lights->value * adapted_luminance * 20.f; - if (hdr_factor <= 0.f) - return; + if (hdr_factor <= 0.f) + return; int num_beams = 0; @@ -515,32 +507,32 @@ void vkpt_build_beam_lights(light_poly_t* light_list, int* num_lights, int max_l qsort(beams, num_beams, sizeof(entity_t*), compare_beams); - for (int i = 0; i < num_beams; i++) - { - if (*num_lights >= max_lights) - return; + for (int i = 0; i < num_beams; i++) + { + if (*num_lights >= max_lights) + return; const entity_t* beam = beams[i]; - vec3_t begin; - vec3_t end; - VectorCopy(beam->oldorigin, begin); - VectorCopy(beam->origin, end); + vec3_t begin; + vec3_t end; + VectorCopy(beam->oldorigin, begin); + VectorCopy(beam->origin, end); - vec3_t to_end; - VectorSubtract(end, begin, to_end); + vec3_t to_end; + VectorSubtract(end, begin, to_end); - vec3_t norm_dir; - VectorCopy(to_end, norm_dir); - VectorNormalize(norm_dir); - VectorMA(begin, -5.f, norm_dir, begin); - VectorMA(end, 5.f, norm_dir, end); + vec3_t norm_dir; + VectorCopy(to_end, norm_dir); + VectorNormalize(norm_dir); + VectorMA(begin, -5.f, norm_dir, begin); + VectorMA(end, 5.f, norm_dir, end); vec3_t color; cast_u32_to_f32_color(beam->skinnum, &beam->rgba, color, hdr_factor); vkpt_build_cylinder_light(light_list, num_lights, max_lights, bsp, begin, end, color, beam_width); - } + } } static void write_sprite_geometry(const float* view_matrix, const entity_t* entities, int entity_num) @@ -664,19 +656,19 @@ static void upload_geometry(VkCommandBuffer command_buffer) }; if (vertices.size) - vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.vertex_buffer, + vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.vertex_buffer.buffer, 1, &vertices); if (particle_colors.size) - vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.particle_color_buffer, + vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.particle_color_buffer.buffer, 1, &particle_colors); if (beam_colors.size) - vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.beam_color_buffer, + vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.beam_color_buffer.buffer, 1, &beam_colors); if (sprite_infos.size) - vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.sprite_info_buffer, + vkCmdCopyBuffer(command_buffer, transparency.host_buffer, transparency.sprite_info_buffer.buffer, 1, &sprite_infos); for (size_t i = 0; i < LENGTH(transparency.transfer_barriers); i++) @@ -688,266 +680,60 @@ static void upload_geometry(VkCommandBuffer command_buffer) transparency.transfer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; } - transparency.transfer_barriers[0].buffer = transparency.vertex_buffer; + transparency.transfer_barriers[0].buffer = transparency.vertex_buffer.buffer; transparency.transfer_barriers[0].size = vertices.size; - transparency.transfer_barriers[1].buffer = transparency.particle_color_buffer; + transparency.transfer_barriers[1].buffer = transparency.particle_color_buffer.buffer; transparency.transfer_barriers[1].size = particle_colors.size; - transparency.transfer_barriers[2].buffer = transparency.beam_color_buffer; + transparency.transfer_barriers[2].buffer = transparency.beam_color_buffer.buffer; transparency.transfer_barriers[2].size = beam_colors.size; - transparency.transfer_barriers[3].buffer = transparency.sprite_info_buffer; + transparency.transfer_barriers[3].buffer = transparency.sprite_info_buffer.buffer; transparency.transfer_barriers[3].size = sprite_infos.size; } -static void update_particle_blas(VkCommandBuffer command_buffer) -{ - if (transparency.particle_num == 0 && transparency.blas_particle_num == 0) - return; - - uint32_t barrier_count = 0; - VkBufferMemoryBarrier barriers[4]; - - for (uint32_t i = 0; i < LENGTH(transparency.transfer_barriers); i++) { - if (!transparency.transfer_barriers[i].size) - continue; - - barriers[barrier_count] = transparency.transfer_barriers[i]; - barrier_count++; - } - - vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, 0, NULL, barrier_count, barriers, 0, NULL); - - const VkGeometryTrianglesNV triangles = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV, - .vertexData = transparency.vertex_buffer, - .vertexCount = transparency.particle_num * 4, - .vertexStride = TR_POSITION_SIZE, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .indexData = transparency.index_buffer, - .indexCount = transparency.particle_num * 6, - .indexType = VK_INDEX_TYPE_UINT16 - }; - - const VkGeometryAABBNV aabbs = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV - }; - - const VkGeometryDataNV geometry_data = { - .triangles = triangles, - .aabbs = aabbs - }; - - const VkGeometryNV geometry = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, - .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV, - .geometry = geometry_data - }; - - const VkAccelerationStructureInfoNV info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - .flags = TR_BLAS_BUILD_FLAGS, - .instanceCount = 0, - .geometryCount = 1, - .pGeometries = &geometry - }; - - const VkBool32 update = transparency.blas_particle_num == transparency.particle_num ? VK_TRUE : VK_FALSE; - transparency.blas_particle_num = transparency.particle_num; - - qvkCmdBuildAccelerationStructureNV(command_buffer, &info, VK_NULL_HANDLE, 0, update, - transparency.particle_blas, transparency.particle_blas, transparency.scratch_buffer, 0); -} - -static void update_beam_blas(VkCommandBuffer command_buffer) -{ - if (transparency.beam_num == 0 && transparency.blas_beam_num == 0) - return; - - const VkGeometryTrianglesNV triangles = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV, - .vertexData = transparency.vertex_buffer, - .vertexCount = transparency.beam_num * 4, - .vertexStride = TR_POSITION_SIZE, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .vertexOffset = transparency.beam_vertex_device_offset, - .indexData = transparency.index_buffer, - .indexCount = transparency.beam_num * 6, - .indexType = VK_INDEX_TYPE_UINT16 - }; - - const VkGeometryAABBNV aabbs = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV - }; - - const VkGeometryDataNV geometry_data = { - .triangles = triangles, - .aabbs = aabbs - }; - - const VkGeometryNV geometry = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, - .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV, - .geometry = geometry_data - }; - - const VkAccelerationStructureInfoNV info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - .flags = TR_BLAS_BUILD_FLAGS, - .instanceCount = 0, - .geometryCount = 1, - .pGeometries = &geometry - }; - - - const VkBool32 update = transparency.blas_beam_num == transparency.beam_num ? VK_TRUE : VK_FALSE; - transparency.blas_beam_num = transparency.beam_num; - - qvkCmdBuildAccelerationStructureNV(command_buffer, &info, VK_NULL_HANDLE, 0, update, - transparency.beam_blas, transparency.beam_blas, transparency.scratch_buffer, - transparency.beam_scratch_device_offset); -} - -static void update_sprite_blas(VkCommandBuffer command_buffer) -{ - if (transparency.sprite_num == 0 && transparency.blas_sprite_num == 0) - return; - - const VkGeometryTrianglesNV triangles = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV, - .vertexData = transparency.vertex_buffer, - .vertexCount = transparency.sprite_num * 4, - .vertexStride = TR_POSITION_SIZE, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .vertexOffset = transparency.sprite_vertex_device_offset, - .indexData = transparency.index_buffer, - .indexCount = transparency.sprite_num * 6, - .indexType = VK_INDEX_TYPE_UINT16 - }; - - const VkGeometryAABBNV aabbs = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV - }; - - const VkGeometryDataNV geometry_data = { - .triangles = triangles, - .aabbs = aabbs - }; - - const VkGeometryNV geometry = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, - .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV, - .geometry = geometry_data - }; - - const VkAccelerationStructureInfoNV info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - .flags = TR_BLAS_BUILD_FLAGS, - .instanceCount = 0, - .geometryCount = 1, - .pGeometries = &geometry - }; - - const VkBool32 update = transparency.blas_sprite_num == transparency.sprite_num ? VK_TRUE : VK_FALSE; - transparency.blas_sprite_num = transparency.sprite_num; - - qvkCmdBuildAccelerationStructureNV(command_buffer, &info, VK_NULL_HANDLE, 0, update, - transparency.sprite_blas, transparency.sprite_blas, transparency.scratch_buffer, - transparency.sprite_scratch_device_offset); -} - -static size_t calculate_scratch_buffer_size(VkAccelerationStructureNV blas) -{ - VkAccelerationStructureMemoryRequirementsInfoNV scratch_requirements_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_BUILD_SCRATCH_NV, - .accelerationStructure = blas - }; - - VkMemoryRequirements2 build_memory_requirements = { 0 }; - build_memory_requirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &scratch_requirements_info, - &build_memory_requirements); - - scratch_requirements_info.type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_UPDATE_SCRATCH_NV; - - VkMemoryRequirements2 update_memory_requirements = { 0 }; - update_memory_requirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &scratch_requirements_info, - &update_memory_requirements); - - const uint64_t build_size = build_memory_requirements.memoryRequirements.size; - const uint64_t update_size = update_memory_requirements.memoryRequirements.size; - - return max(build_size, update_size); -} static void create_buffers() { - const uint64_t particle_scratch_size = calculate_scratch_buffer_size(transparency.particle_blas); - const uint64_t beam_scratch_size = calculate_scratch_buffer_size(transparency.beam_blas); - const uint64_t sprite_scratch_size = calculate_scratch_buffer_size(transparency.sprite_blas); - - transparency.beam_scratch_device_offset = particle_scratch_size; - transparency.sprite_scratch_device_offset = transparency.beam_scratch_device_offset + beam_scratch_size; - const VkBufferCreateInfo host_buffer_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = transparency.host_buffered_frame_num * transparency.host_frame_size, .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT }; - const VkBufferCreateInfo scratch_buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = particle_scratch_size + beam_scratch_size + sprite_scratch_size, - .usage = VK_BUFFER_USAGE_RAY_TRACING_BIT_NV - }; - - const VkBufferCreateInfo buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = transparency.host_frame_size, - .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - }; - - const VkBufferCreateInfo index_buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = TR_INDEX_MAX_NUM * sizeof(uint16_t), - .usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - }; - - const VkBufferCreateInfo particle_color_buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = TR_PARTICLE_MAX_NUM * TR_COLOR_SIZE, - .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - }; - - const VkBufferCreateInfo beam_color_buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = TR_BEAM_MAX_NUM * TR_COLOR_SIZE, - .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - }; - - const VkBufferCreateInfo sprite_info_buffer_info = { - .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, - .size = TR_SPRITE_MAX_NUM * TR_SPRITE_INFO_SIZE, - .usage = VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT - }; - _VK(vkCreateBuffer(qvk.device, &host_buffer_info, NULL, &transparency.host_buffer)); - _VK(vkCreateBuffer(qvk.device, &scratch_buffer_info, NULL, &transparency.scratch_buffer)); - _VK(vkCreateBuffer(qvk.device, &buffer_info, NULL, &transparency.vertex_buffer)); - _VK(vkCreateBuffer(qvk.device, &index_buffer_info, NULL, &transparency.index_buffer)); - _VK(vkCreateBuffer(qvk.device, &particle_color_buffer_info, NULL, &transparency.particle_color_buffer)); - _VK(vkCreateBuffer(qvk.device, &beam_color_buffer_info, NULL, &transparency.beam_color_buffer)); - _VK(vkCreateBuffer(qvk.device, &sprite_info_buffer_info, NULL, &transparency.sprite_info_buffer)); + + buffer_create( + &transparency.vertex_buffer, + transparency.host_frame_size, + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create( + &transparency.index_buffer, + TR_INDEX_MAX_NUM * sizeof(uint16_t), + VK_BUFFER_USAGE_INDEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create( + &transparency.particle_color_buffer, + TR_PARTICLE_MAX_NUM * TR_COLOR_SIZE, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create( + &transparency.beam_color_buffer, + TR_BEAM_MAX_NUM * TR_COLOR_SIZE, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create( + &transparency.sprite_info_buffer, + TR_SPRITE_MAX_NUM * TR_SPRITE_INFO_SIZE, + VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); } static qboolean allocate_and_bind_memory_to_buffers() { - // host buffer - VkMemoryRequirements host_buffer_requirements; vkGetBufferMemoryRequirements(qvk.device, transparency.host_buffer, &host_buffer_requirements); @@ -964,187 +750,42 @@ static qboolean allocate_and_bind_memory_to_buffers() _VK(vkAllocateMemory(qvk.device, &host_memory_allocate_info, NULL, &transparency.host_buffer_memory)); - // device buffers - - const VkBuffer device_buffers[] = { - transparency.scratch_buffer, - transparency.vertex_buffer, - transparency.index_buffer, - transparency.particle_color_buffer, - transparency.beam_color_buffer, - transparency.sprite_info_buffer - }; - - const VkMemoryPropertyFlags device_flags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - uint32_t memory_types[LENGTH(device_buffers)]; - VkMemoryRequirements requirements[LENGTH(device_buffers)]; - - VkMemoryAllocateInfo memory_allocate_info = { .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO }; - - for (size_t i = 0; i < LENGTH(device_buffers); i++) - { - vkGetBufferMemoryRequirements(qvk.device, device_buffers[i], &requirements[i]); - memory_types[i] = get_memory_type(requirements[i].memoryTypeBits, device_flags); - memory_allocate_info.allocationSize += requirements[i].size + ((i == 0) ? 0 : requirements[i].alignment); - } - - // If the buffers need different memory types, the code that does allocation and binding must be changed - for (size_t i = 1; i < LENGTH(device_buffers); i++) - { - if (memory_types[i] != memory_types[0]) - return qfalse; - } - - memory_allocate_info.memoryTypeIndex = memory_types[0]; - - -#ifdef VKPT_DEVICE_GROUPS - VkMemoryAllocateFlagsInfoKHR mem_alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR, - .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR, - .deviceMask = (1 << qvk.device_count) - 1 - }; - - if (qvk.device_count > 1) { - memory_allocate_info.pNext = &mem_alloc_flags; - } -#endif - - _VK(vkAllocateMemory(qvk.device, &memory_allocate_info, NULL, &transparency.device_buffer_memory)); - - // bind device buffers and a host buffer - VkBindBufferMemoryInfo bindings[LENGTH(device_buffers) + 1] = { 0 }; + VkBindBufferMemoryInfo bindings[1] = { 0 }; bindings[0].sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO; bindings[0].buffer = transparency.host_buffer; bindings[0].memory = transparency.host_buffer_memory; bindings[0].memoryOffset = 0; - size_t offset = 0; - for (size_t i = 0; i < LENGTH(device_buffers); i++) - { - VkBindBufferMemoryInfo* binding = bindings + i + 1; - - binding->sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO; - binding->buffer = device_buffers[i]; - binding->memory = transparency.device_buffer_memory; - - offset = align(offset, requirements[i].alignment); - binding->memoryOffset = offset; - offset += requirements[i].size; - } - _VK(vkBindBufferMemory2(qvk.device, LENGTH(bindings), bindings)); const size_t host_buffer_size = transparency.host_buffered_frame_num * transparency.host_frame_size; _VK(vkMapMemory(qvk.device, transparency.host_buffer_memory, 0, host_buffer_size, 0, &transparency.mapped_host_buffer)); - + return qtrue; } -static void create_blas() -{ - const VkGeometryTrianglesNV geometry_triangles = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_TRIANGLES_NV, - .vertexCount = TR_VERTEX_MAX_NUM, - .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, - .indexCount = TR_INDEX_MAX_NUM, - .indexType = VK_INDEX_TYPE_UINT16 - }; - - const VkGeometryAABBNV geometry_aabbs = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_AABB_NV - }; - - const VkGeometryDataNV geometry_data = { - .triangles = geometry_triangles, - .aabbs = geometry_aabbs - }; - - const VkGeometryNV geometry = { - .sType = VK_STRUCTURE_TYPE_GEOMETRY_NV, - .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_NV, - .geometry = geometry_data - }; - - const VkAccelerationStructureInfoNV info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_NV, - .flags = TR_BLAS_BUILD_FLAGS, - .geometryCount = 1, - .pGeometries = &geometry - }; - - const VkAccelerationStructureCreateInfoNV blas_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_NV, - .info = info - }; - - _VK(qvkCreateAccelerationStructureNV(qvk.device, &blas_info, NULL, &transparency.particle_blas)); - _VK(qvkCreateAccelerationStructureNV(qvk.device, &blas_info, NULL, &transparency.beam_blas)); - _VK(qvkCreateAccelerationStructureNV(qvk.device, &blas_info, NULL, &transparency.sprite_blas)); -} - -static void allocate_and_bind_memory_to_blas() -{ - const VkAccelerationStructureMemoryRequirementsInfoNV blas_requirements_info = { - .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_INFO_NV, - .type = VK_ACCELERATION_STRUCTURE_MEMORY_REQUIREMENTS_TYPE_OBJECT_NV, - .accelerationStructure = transparency.particle_blas - }; - - VkMemoryRequirements2 blas_memory_requirements = { 0 }; - blas_memory_requirements.sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2; - qvkGetAccelerationStructureMemoryRequirementsNV(qvk.device, &blas_requirements_info, - &blas_memory_requirements); - - VkMemoryRequirements mem_req = blas_memory_requirements.memoryRequirements; - size_t beam_memory_offset = align(mem_req.size, mem_req.alignment); - mem_req.size += beam_memory_offset; - - size_t sprite_memory_offset = align(mem_req.size, mem_req.alignment); - mem_req.size += sprite_memory_offset; - - _VK(allocate_gpu_memory(mem_req, &transparency.device_blas_memory)); - - VkBindAccelerationStructureMemoryInfoNV bindings[3] = { 0 }; - - bindings[0].sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV; - bindings[0].accelerationStructure = transparency.particle_blas; - bindings[0].memory = transparency.device_blas_memory; - bindings[1].sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV; - bindings[1].accelerationStructure = transparency.beam_blas; - bindings[1].memory = transparency.device_blas_memory; - bindings[1].memoryOffset = beam_memory_offset; - bindings[2].sType = VK_STRUCTURE_TYPE_BIND_ACCELERATION_STRUCTURE_MEMORY_INFO_NV; - bindings[2].accelerationStructure = transparency.sprite_blas; - bindings[2].memory = transparency.device_blas_memory; - bindings[2].memoryOffset = sprite_memory_offset; - - _VK(qvkBindAccelerationStructureMemoryNV(qvk.device, LENGTH(bindings), bindings)); -} - static void create_buffer_views() { const VkBufferViewCreateInfo particle_color_view_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .buffer = transparency.particle_color_buffer, + .buffer = transparency.particle_color_buffer.buffer, .format = VK_FORMAT_R32G32B32A32_SFLOAT, .range = TR_PARTICLE_MAX_NUM * TR_COLOR_SIZE }; const VkBufferViewCreateInfo beam_color_view_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .buffer = transparency.beam_color_buffer, + .buffer = transparency.beam_color_buffer.buffer, .format = VK_FORMAT_R32G32B32A32_SFLOAT, .range = TR_BEAM_MAX_NUM * TR_COLOR_SIZE }; const VkBufferViewCreateInfo sprite_info_view_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO, - .buffer = transparency.sprite_info_buffer, + .buffer = transparency.sprite_info_buffer.buffer, .format = VK_FORMAT_R32G32_UINT, .range = TR_SPRITE_MAX_NUM * TR_SPRITE_INFO_SIZE }; @@ -1184,7 +825,7 @@ static void fill_index_buffer() .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = transparency.index_buffer, + .buffer = transparency.index_buffer.buffer, .size = VK_WHOLE_SIZE }; @@ -1195,7 +836,7 @@ static void fill_index_buffer() .size = TR_INDEX_MAX_NUM * sizeof(uint16_t) }; - vkCmdCopyBuffer(cmd_buf, transparency.host_buffer, transparency.index_buffer, 1, ®ion); + vkCmdCopyBuffer(cmd_buf, transparency.host_buffer, transparency.index_buffer.buffer, 1, ®ion); const VkBufferMemoryBarrier post_barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, @@ -1203,7 +844,7 @@ static void fill_index_buffer() .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .buffer = transparency.index_buffer, + .buffer = transparency.index_buffer.buffer, .size = VK_WHOLE_SIZE }; diff --git a/src/refresh/vkpt/vertex_buffer.c b/src/refresh/vkpt/vertex_buffer.c index 003b4310e..e377c025f 100644 --- a/src/refresh/vkpt/vertex_buffer.c +++ b/src/refresh/vkpt/vertex_buffer.c @@ -56,7 +56,7 @@ vkpt_vertex_buffer_bsp_upload_staging() BUFFER_BARRIER(cmd_buf, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_NV, + .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, .buffer = qvk.buf_vertex_bsp.buffer, .offset = 0, .size = VK_WHOLE_SIZE, @@ -664,7 +664,7 @@ vkpt_vertex_buffer_create() _VK(vkCreateDescriptorSetLayout(qvk.device, &layout_info, NULL, &qvk.desc_set_layout_vertex_buffer)); buffer_create(&qvk.buf_vertex_bsp, sizeof(BspVertexBuffer), - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); buffer_create(&qvk.buf_vertex_bsp_staging, sizeof(BspVertexBuffer), @@ -672,7 +672,7 @@ vkpt_vertex_buffer_create() VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); buffer_create(&qvk.buf_vertex_model_dynamic, sizeof(ModelDynamicVertexBuffer), - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); buffer_create(&qvk.buf_light, sizeof(LightBuffer), diff --git a/src/refresh/vkpt/vk_util.c b/src/refresh/vkpt/vk_util.c index 4c215e8ea..983f35383 100644 --- a/src/refresh/vkpt/vk_util.c +++ b/src/refresh/vkpt/vk_util.c @@ -64,6 +64,9 @@ buffer_create( assert(buf); VkResult result = VK_SUCCESS; + if (!qvk.use_khr_ray_tracing) + usage &= ~VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + VkBufferCreateInfo buf_create_info = { .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .size = size, @@ -91,18 +94,21 @@ buffer_create( .memoryTypeIndex = get_memory_type(mem_reqs.memoryTypeBits, mem_properties) }; -#ifdef VKPT_DEVICE_GROUPS - VkMemoryAllocateFlagsInfoKHR mem_alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR, - .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR, - .deviceMask = (1 << qvk.device_count) - 1 + VkMemoryAllocateFlagsInfo mem_alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) ? VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT : 0, + .deviceMask = 0 }; +#ifdef VKPT_DEVICE_GROUPS if (qvk.device_count > 1 && !(mem_properties & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { - mem_alloc_info.pNext = &mem_alloc_flags; + mem_alloc_flags.flags |= VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT; + mem_alloc_flags.deviceMask = (1 << qvk.device_count) - 1; } #endif + mem_alloc_info.pNext = &mem_alloc_flags; + result = vkAllocateMemory(qvk.device, &mem_alloc_info, NULL, &buf->memory); if(result != VK_SUCCESS) { goto fail_mem_alloc; @@ -115,6 +121,16 @@ buffer_create( goto fail_bind_buf_memory; } + if (usage & VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT) + { + buf->address = get_buffer_device_address(buf->buffer); + assert(buf->address); + } + else + { + buf->address = 0; + } + return VK_SUCCESS; fail_bind_buf_memory: @@ -139,6 +155,7 @@ buffer_destroy(BufferResource_t *buf) buf->buffer = VK_NULL_HANDLE; buf->memory = VK_NULL_HANDLE; buf->size = 0; + buf->address = 0; return VK_SUCCESS; } @@ -163,6 +180,17 @@ buffer_unmap(BufferResource_t *buf) vkUnmapMemory(qvk.device, buf->memory); } +VkDeviceAddress +get_buffer_device_address(VkBuffer buffer) +{ + VkBufferDeviceAddressInfo address_info = { + .sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO, + .buffer = buffer + }; + + return vkGetBufferDeviceAddress(qvk.device, &address_info); +} + const char * qvk_format_to_string(VkFormat format) { @@ -408,9 +436,9 @@ VkResult allocate_gpu_memory(VkMemoryRequirements mem_req, VkDeviceMemory* pMemo }; #ifdef VKPT_DEVICE_GROUPS - VkMemoryAllocateFlagsInfoKHR mem_alloc_flags = { - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO_KHR, - .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT_KHR, + VkMemoryAllocateFlagsInfo mem_alloc_flags = { + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO, + .flags = VK_MEMORY_ALLOCATE_DEVICE_MASK_BIT, .deviceMask = (1 << qvk.device_count) - 1 }; @@ -429,9 +457,9 @@ void set_current_gpu(VkCommandBuffer cmd_buf, int gpu_index) if (qvk.device_count > 1) { if(gpu_index == ALL_GPUS) - qvkCmdSetDeviceMaskKHR(cmd_buf, (1 << qvk.device_count) - 1); + vkCmdSetDeviceMask(cmd_buf, (1 << qvk.device_count) - 1); else - qvkCmdSetDeviceMaskKHR(cmd_buf, 1 << gpu_index); + vkCmdSetDeviceMask(cmd_buf, 1 << gpu_index); } #endif } @@ -494,8 +522,8 @@ const char *qvk_result_to_string(VkResult result) return "VK_ERROR_VALIDATION_FAILED_EXT"; case VK_ERROR_INVALID_SHADER_NV: return "VK_ERROR_INVALID_SHADER_NV"; - case VK_ERROR_FRAGMENTATION_EXT: - return "VK_ERROR_FRAGMENTATION_EXT"; + case VK_ERROR_FRAGMENTATION: + return "VK_ERROR_FRAGMENTATION"; case VK_ERROR_NOT_PERMITTED_EXT: return "VK_ERROR_NOT_PERMITTED_EXT"; } diff --git a/src/refresh/vkpt/vk_util.h b/src/refresh/vkpt/vk_util.h index ef8a37a70..f7bdc32d5 100644 --- a/src/refresh/vkpt/vk_util.h +++ b/src/refresh/vkpt/vk_util.h @@ -33,6 +33,7 @@ char * sgets(char * str, int num, char const ** input); typedef struct BufferResource_s { VkBuffer buffer; VkDeviceMemory memory; + VkDeviceAddress address; size_t size; int is_mapped; } BufferResource_t; @@ -49,6 +50,8 @@ void buffer_unmap(BufferResource_t *buf); void *buffer_map(BufferResource_t *buf); void buffer_unmap(BufferResource_t *buf); +VkDeviceAddress get_buffer_device_address(VkBuffer buffer); + uint32_t get_memory_type(uint32_t mem_req_type_bits, VkMemoryPropertyFlags mem_prop); diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index f3d553c0a..32723ce0e 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -72,17 +72,6 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_STRETCH_PIC_FRAG) \ SHADER_MODULE_DO(QVK_MOD_FINAL_BLIT_VERT) \ SHADER_MODULE_DO(QVK_MOD_FINAL_BLIT_LANCZOS_FRAG) \ - SHADER_MODULE_DO(QVK_MOD_PRIMARY_RAYS_RGEN) \ - SHADER_MODULE_DO(QVK_MOD_REFLECT_REFRACT_RGEN) \ - SHADER_MODULE_DO(QVK_MOD_DIRECT_LIGHTING_RGEN) \ - SHADER_MODULE_DO(QVK_MOD_INDIRECT_LIGHTING_RGEN) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_RCHIT) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_PARTICLE_RAHIT) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_BEAM_RAHIT) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_RMISS) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_SHADOW_RMISS) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_EXPLOSION_RAHIT) \ - SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_SPRITE_RAHIT) \ SHADER_MODULE_DO(QVK_MOD_INSTANCE_GEOMETRY_COMP) \ SHADER_MODULE_DO(QVK_MOD_ANIMATE_MATERIALS_COMP) \ SHADER_MODULE_DO(QVK_MOD_ASVGF_GRADIENT_IMG_COMP) \ @@ -106,11 +95,18 @@ with this program; if not, write to the Free Software Foundation, Inc., SHADER_MODULE_DO(QVK_MOD_SHADOW_MAP_VERT) \ SHADER_MODULE_DO(QVK_MOD_COMPOSITING_COMP) \ -#ifndef VKPT_SHADER_DIR -#define VKPT_SHADER_DIR "shader_vkpt" -#endif - -#define SHADER_PATH_TEMPLATE VKPT_SHADER_DIR "/%s.spv" +#define LIST_RT_SHADER_MODULES \ + SHADER_MODULE_DO(QVK_MOD_PRIMARY_RAYS_RGEN) \ + SHADER_MODULE_DO(QVK_MOD_REFLECT_REFRACT_RGEN) \ + SHADER_MODULE_DO(QVK_MOD_DIRECT_LIGHTING_RGEN) \ + SHADER_MODULE_DO(QVK_MOD_INDIRECT_LIGHTING_RGEN) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_RCHIT) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_PARTICLE_RAHIT) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_BEAM_RAHIT) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_RMISS) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_SHADOW_RMISS) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_EXPLOSION_RAHIT) \ + SHADER_MODULE_DO(QVK_MOD_PATH_TRACER_SPRITE_RAHIT) \ #define SHADER_STAGE(_module, _stage) \ { \ @@ -132,6 +128,7 @@ with this program; if not, write to the Free Software Foundation, Inc., enum QVK_SHADER_MODULES { #define SHADER_MODULE_DO(a) a, LIST_SHADER_MODULES + LIST_RT_SHADER_MODULES #undef SHADER_MODULE_DO NUM_QVK_SHADER_MODULES }; @@ -191,6 +188,8 @@ typedef struct QVK_s { VkImage swap_chain_images[MAX_SWAPCHAIN_IMAGES]; VkImageView swap_chain_image_views[MAX_SWAPCHAIN_IMAGES]; + qboolean use_khr_ray_tracing; + cmd_buf_group_t cmd_buffers_graphics; cmd_buf_group_t cmd_buffers_compute; cmd_buf_group_t cmd_buffers_transfer; @@ -280,43 +279,44 @@ typedef struct QVK_s { extern QVK_t qvk; -#define _VK_INST_EXTENSION_LIST \ - _VK_INST_EXTENSION_DO(vkCmdBeginDebugUtilsLabelEXT) \ - _VK_INST_EXTENSION_DO(vkCmdEndDebugUtilsLabelEXT) \ - _VK_INST_EXTENSION_DO(vkEnumeratePhysicalDeviceGroupsKHR) - -#define _VK_INST_EXTENSION_DO(a) extern PFN_##a q##a; -_VK_INST_EXTENSION_LIST -#undef _VK_INST_EXTENSION_DO - -#define _VK_EXTENSION_LIST \ - _VK_EXTENSION_DO(vkCreateAccelerationStructureNV) \ - _VK_EXTENSION_DO(vkCreateAccelerationStructureNV) \ - _VK_EXTENSION_DO(vkDestroyAccelerationStructureNV) \ - _VK_EXTENSION_DO(vkGetAccelerationStructureMemoryRequirementsNV) \ - _VK_EXTENSION_DO(vkBindAccelerationStructureMemoryNV) \ - _VK_EXTENSION_DO(vkCmdBuildAccelerationStructureNV) \ - _VK_EXTENSION_DO(vkCmdCopyAccelerationStructureNV) \ - _VK_EXTENSION_DO(vkCmdTraceRaysNV) \ - _VK_EXTENSION_DO(vkCreateRayTracingPipelinesNV) \ - _VK_EXTENSION_DO(vkGetRayTracingShaderGroupHandlesNV) \ - _VK_EXTENSION_DO(vkGetAccelerationStructureHandleNV) \ - _VK_EXTENSION_DO(vkCmdWriteAccelerationStructuresPropertiesNV) \ - _VK_EXTENSION_DO(vkCompileDeferredNV) \ - _VK_EXTENSION_DO(vkDebugMarkerSetObjectNameEXT) \ - _VK_EXTENSION_DO(vkGetDeviceGroupPeerMemoryFeaturesKHR) \ - _VK_EXTENSION_DO(vkCmdSetDeviceMaskKHR) \ - _VK_EXTENSION_DO(vkCmdDispatchBaseKHR) \ - _VK_EXTENSION_DO(vkGetDeviceGroupPresentCapabilitiesKHR) \ - _VK_EXTENSION_DO(vkGetDeviceGroupSurfacePresentModesKHR) \ - _VK_EXTENSION_DO(vkGetPhysicalDevicePresentRectanglesKHR) \ - _VK_EXTENSION_DO(vkAcquireNextImage2KHR) \ - _VK_EXTENSION_DO(vkBindImageMemory2KHR) - - -#define _VK_EXTENSION_DO(a) extern PFN_##a q##a; -_VK_EXTENSION_LIST -#undef _VK_EXTENSION_DO +#define LIST_EXTENSIONS_KHR \ + VK_EXTENSION_DO(vkCreateAccelerationStructureKHR) \ + VK_EXTENSION_DO(vkDestroyAccelerationStructureKHR) \ + VK_EXTENSION_DO(vkCmdBuildAccelerationStructuresKHR) \ + VK_EXTENSION_DO(vkCmdCopyAccelerationStructureKHR) \ + VK_EXTENSION_DO(vkCmdTraceRaysKHR) \ + VK_EXTENSION_DO(vkCreateRayTracingPipelinesKHR) \ + VK_EXTENSION_DO(vkGetRayTracingShaderGroupHandlesKHR) \ + VK_EXTENSION_DO(vkGetAccelerationStructureDeviceAddressKHR) \ + VK_EXTENSION_DO(vkCmdWriteAccelerationStructuresPropertiesKHR) \ + VK_EXTENSION_DO(vkGetAccelerationStructureBuildSizesKHR) \ + +#define LIST_EXTENSIONS_NV \ + VK_EXTENSION_DO(vkCreateAccelerationStructureNV) \ + VK_EXTENSION_DO(vkDestroyAccelerationStructureNV) \ + VK_EXTENSION_DO(vkGetAccelerationStructureMemoryRequirementsNV) \ + VK_EXTENSION_DO(vkBindAccelerationStructureMemoryNV) \ + VK_EXTENSION_DO(vkCmdBuildAccelerationStructureNV) \ + VK_EXTENSION_DO(vkCmdCopyAccelerationStructureNV) \ + VK_EXTENSION_DO(vkCmdTraceRaysNV) \ + VK_EXTENSION_DO(vkCreateRayTracingPipelinesNV) \ + VK_EXTENSION_DO(vkGetRayTracingShaderGroupHandlesNV) \ + VK_EXTENSION_DO(vkGetAccelerationStructureHandleNV) \ + VK_EXTENSION_DO(vkCmdWriteAccelerationStructuresPropertiesNV) \ + +#define LIST_EXTENSIONS_DEBUG \ + VK_EXTENSION_DO(vkDebugMarkerSetObjectNameEXT) \ + +#define LIST_EXTENSIONS_INSTANCE \ + VK_EXTENSION_DO(vkCmdBeginDebugUtilsLabelEXT) \ + VK_EXTENSION_DO(vkCmdEndDebugUtilsLabelEXT) + +#define VK_EXTENSION_DO(a) extern PFN_##a q##a; +LIST_EXTENSIONS_KHR +LIST_EXTENSIONS_NV +LIST_EXTENSIONS_DEBUG +LIST_EXTENSIONS_INSTANCE +#undef VK_EXTENSION_DO #define MAX_SKY_CLUSTERS 1024 @@ -604,8 +604,8 @@ VkResult vkpt_pt_create_pipelines(); VkResult vkpt_pt_destroy_pipelines(); VkResult vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world, qboolean weapon_left_handed); -VkResult vkpt_pt_create_static(VkBuffer vertex_buffer, size_t buffer_offset, int num_vertices, int num_vertices_transparent, int num_vertices_sky, int num_vertices_custom_sky); -VkResult vkpt_pt_destroy_static(); +VkResult vkpt_pt_create_static(int num_vertices, int num_vertices_transparent, int num_vertices_sky, int num_vertices_custom_sky); +void vkpt_pt_destroy_static(); VkResult vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf); VkResult vkpt_pt_trace_reflections(VkCommandBuffer cmd_buf, int bounce); VkResult vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays); @@ -655,11 +655,23 @@ void destroy_transparency(); void update_transparency(VkCommandBuffer command_buffer, const float* view_matrix, const particle_t* particles, int particle_num, const entity_t* entities, int entity_num); -void build_transparency_blas(VkCommandBuffer cmd_buf); +typedef enum { + VKPT_TRANSPARENCY_PARTICLES, + VKPT_TRANSPARENCY_BEAMS, + VKPT_TRANSPARENCY_SPRITES, + + VKPT_TRANSPARENCY_COUNT +} vkpt_transparency_t; + +void vkpt_get_transparency_buffers( + vkpt_transparency_t ttype, + BufferResource_t** vertex_buffer, + uint64_t* vertex_offset, + BufferResource_t** index_buffer, + uint64_t* index_offset, + uint32_t* num_vertices, + uint32_t* num_indices); -VkAccelerationStructureNV get_transparency_particle_blas(); -VkAccelerationStructureNV get_transparency_beam_blas(); -VkAccelerationStructureNV get_transparency_sprite_blas(); VkBufferView get_transparency_particle_color_buffer_view(); VkBufferView get_transparency_beam_color_buffer_view(); VkBufferView get_transparency_sprite_info_buffer_view(); @@ -723,8 +735,6 @@ typedef struct { float alpha_scale; } drawStatic_t; -extern drawStatic_t draw; - static inline void begin_perf_marker(VkCommandBuffer command_buffer, int index, const char* name) { _VK(vkpt_profiler_query(command_buffer, index, PROFILER_START)); diff --git a/vulkan-1.lib b/vulkan-1.lib index 89f2cd3ecc632bd888562c5c9817caa8354131e3..adf8b34c736678ffb74f3b2909b82e6583d05438 100644 GIT binary patch literal 52366 zcmeHQX^>sTbw0=#6QU4e6O4BmV=UVki$NL*Oo%iR0)<4W<%<)U1vl*_+TDJqp8BwsJ*-qY)Qmp7#HBdR)@xu;K` zulw}r<(}@o^XA^n?Q%EK z>faKbHl1kABBINGZRo~Nh!ovD&(JeZ5-Hk#pP^?TB2x51kD(V2BU1F-bB3PSK&0rQ zRfaD9kVw(>ry08Q2qHyySh{luo^n=yB*!v<(=FUOL*)&h12sF1^Fh4X}}- zw`Uo8>^34r@4+^TuI@K<>sBH~m(DhHJ<6u&G2~P94s5UJtv3x_Ig?1y&2JleZ6lGQ z3m-6aEq*H6vbUk@_90Sq*~f-%+z;&qdSX{Ym#;)Sf^NLr(AB>|xj?t=X6TCfM2c?u zt)bWU1RiK-kD<#pKrd(u^eeh)*w7VEBOmBy*jdq4>kZukUr=<>?+k4nB~o`JHpUqXahwzY%_G_4X_L7mU)I=Ls=AU*<|RtEzk>k^CLq; z_+8OGGYt)cujuv<484gmDH=l=6kYHKL!0&`Qgr?UhRy?D(B1J=c8+zjaj0d2nE;sb#1|mg|K4R#RE+R#jpk9g|J;=}{ z&*2$#&0<5>LPpW68x7snL!{{i`~+=3(9jF7V0;5TJJZmMD4U|^4>R=AZ!k`Po<{i; zJ%h3-dipj)BRetPfJW~#bpP*&6m5Lp(Ao#k{-DP<8@g|RNYOhV8G3jzk)o^q&Csp; zVq5{;58Ekv;8aV$$4}7nFBuxVgGkZErx@A_+ben(by4)x?uMTDcZ?UHhtZ~r9(l~r zwv8AELC?)HbQRq~~m#%sfUbqW|Px2Qfg-W*vc1bq%m z9yvTQRBuA*(t&kjjlSC8qWWZ`uRaFd$t2EuRDImdwHT$FXzI-Av~=!~=1^^XX>(+> z-W+cvwD>YfZA*q~tLuw>@yRto!Qz5Rz4`Kq)TD-=PShvri)%xT!F57BQnI*UQtxr~ zp^;__m4S@V#@>QL^)2Y{Z^8v&tbi+W7+-PaJswLN&ozmK-`Yo(u|8S|xz8+BxFwhD z$hG73;jzZZuw%`(!~&fLyM@NIP@s>?cx-K5y_qkcFB0f7u)dh+r*Q$+gSIpR+O16E zN(Rg6Gf<3v$W3b$U!NHL}3_!*;q0VEl z+~Q`vlo1b{eq3#|P*MKQsV5E(iFEnI5{m~X#s-jawBB#@^wx$~>rp6`!a0`=Pu2z- z{i+VJ?Ac`2QM8W1UN|xY`w!LeYM^_ev0aeHxqBMJ{XG+_R@Iw@2?Cas)nlF`Dmg5P zGD)Sq_0uOB&AKX-njUhgLW58oDo7>z`In8>nq#*06IzlYLYF&RnH;(Z?aUK7amLU+ zL0R4ibKejgmNetx;<}v1%@a%5(T_!(N0#ZWkJZPuglZA_#1d#FoxW@wQ?qS0$zVCv zPho{Zm;208rqs*o=t~15&2j-!a#+$9w@gs|UoT2Z7Rwr~M@|}VmrpEVv$BZu$TFy@ zPM0;643=AF85OGWz4f9BZE-kb6Cx>Q*Kc{#vqJt;+^#{TOdY`X}@xXrLS> z%iD@MX@z=+%Q=j)rH#@0U}Lzxu+}$F_BEHwIV^^oSb(@}&I8*z35#muH8fR}&3TTi zH&@r4Txw8>aXE*n%TY}X8I8U%&Z=^yQrw)F3P&gKeqxPLW(*QIpJxjo`5cyX?OZrk zl+AfOT}7ripK}?N%W6ZTgLSpCcAr_w>L|rL2Fsb;NbGAiM#o2*7|4opq|-RJRT|Hp zN#y)Kf-@ABS~Z*V1Zr)jIG=OrA=uMs=Q)qdIe76{Cc`$$XA%jVPc0~eJP{_#pE#=4 zdqIvUoAVgEEpFC^>MQkPt?&cMV`=DC@9;9IaAe73d856zK8#UmX>F|RCxXSYR<}qg zR0|$UV?Z$61qeT&BM(mu)|!p+btrx^iSwFLspV{KxZiOU$zy3-E}2vmOfp$s`-e=h z&LLSW%MM`E%)BT)!%N_N$_c!r$u$us%lFh*H-?=Iuv3J|@<$H$`_ci6Wo3Pfi9Nz( z`GrIMYVFBmr}8(CBP7vL)RZ_n(o~Zx1SP|jTh`YY8ynH%m=@z_SL#^IT_-pC#|H{^ zCC8OqR39B5=xNln(`t<+$Cd0Ego7^}(Gf7+uT42UwwuT38krdG_w^f|E4>WSguccg z+5|5hc%vIzQxrdv6wP}wdPg*8nGns_TgMw5zY-Y3n!=O`BA;s6U;wi1>+!u@OE5;P zE&Ru3kxWniBn0F*gEgT7ciAG(qGnAmB!~L-Z(fQ9s`neKDgkDMa(LbBv(9J*4JVYW zudVk(Pnf|;kog*8qqXrq>zj-CDI(#iwUPx_glx$6@@U!RBYr_$w#Z}VG%4B!(@U7$D|A&CK!Ulz?UWS)eC z9-D=XcotIXWm^l`&Ly6O6p!LPwPv$n7Lh~6(9TRDujd^73Td54O-$xlNMWuKGR2dS z&}A-OMU5_xC#3Zdx4bcg)e9E8M%v>EY2~biYAChuzN&>{DI(`mMC-hHz-0+pZ3(fk zG1H(jhhPf%b8vXy2Ne`}#s z2({8D@w?hDsupdf+ZLmRS}D~^W%aA-A$57xv6 zSfb&haxJwE)`n3o-kWTpIusWA8D9z6E?V!m%+jhYr9zF$fg(k;n!;4pU>rq=nc79! zoCnsIC6pbmuuV>Y^*A6)jY-6ymrZtcAduFq_ZQA+Eol_Q(%jr9H--=Ca>iSe)8})p z!27hIb==xGb|dqnZ%&*sf-OuexYa%sM*Syl0c2af@c~V+W zbgr<&Vp;g1+11s3a%r9SZn=vYbLT}^mwEfJo5V6=Ay`6I4b1A@x2HB{UoagGOQM9L zs7NQXj^)k9kX<#~_@U2X$-<(#pV~4S3s??I+F8LwU)MV;F+zGT*u@i|!^@5+hAb=s-XUx4b7 zSNLc`Oz3py8>_DuVIG{e7)-8zqjId(Ur*Yld6dn0OcSGx><)?vX)Lc}e+F@V7`}x7 z>#?nzQPfI;HAS`#PHv13h$6L;U`_BT*UG#QE0(XRmzTI;OTF%Lx0UjUv_P;$a969P z%5-w5LoX*~F9_7cC4?qD!kKB*4C#>}tc8rw=C+E?*VbYtA<3GJtCo=nHS&eLLK-^& zZs9esU%h!! zZE!;M`D7C3Re`x8vsy-b43=BgSd9f#AqhdB!;*kBQ=l0{)w#>M-n&aA1o=)Ft(`uB zBLbu9{%RI<`44s>xYYSvQf<3lCb|z#$f=xbvZAQWC9V7a~1w{L9LYdFU@8{t@2j~=> zvEK~dIMP1B&(m;5{&%1qkiT>t(SLxJpNaacCpvsB^5c0L&d~oJ)IAQ}___29qMvOh z`p>grhY8?=?n2sb(0@3v4mcM!L;9j~kbXMKd=~CLfY%NC?L^+~;BNwd)dt*|LE2{M zSr41ht0?Eos2}bNti;bIey;r$*w>&fk^ZkcV0+Z{fHz?4r(n}3ad+TRqOM1PbqUcy z`29KjTnzaxw9`iD>$wZ}DL@Bq$Gs2u+stR7??w3C^JuG=pzmqudj`IMpF80<_l?3H z_u~%1M%WGY-wgj5xDU34KP-Nj=-<$0``(K3LFUv4U>DF!&~?WkYzyE0!&czHA9qLj z{{0Em6?q;*`8IBYUp)st@_&rF|7HWQQSY6w^@p(aFzPnrebgB^lknT$E6fLR2kC9x zE87M4=4cwdOxM#d=%@4*`X$Yv$LQPiIGs-y(c83!2B}6vG)ZUEnRFJ7(-|~DYw1F2 z(vvhs!!$yt)B7|^@6jf@gx;k~X+2#`@6bltLg&&2w1HMpomSHmbPn}X9}UoH)S#8L zj?Saa)J^l~=d_rPr(@|A`g6LKZl{Cjd$gR6q{Ha1XjfW5N7Es661_!#L(kHI^w)F; zT}yY;Tso9ap%dsh`W5X(Kc<;XHl0kz&~x-7nopO}b+j+7pfA$i z^cVCcx|{By-RK+ichpN?rGKRT=xO>VdY-n?3-ls=i+)Xe(qGbcdYx{eopc-BOta_* z^c{MN_NO=KaQZXaK{wHBbR#`XkJ2Ob1-h4Zr~Bzo=uMhV57JAt2i-@1N)OO?=^?s` zuAr;wO1g$Fr@Lq&&7q&s-_ut5CVh<-(E)S=?L%LtQ|WJMDRt5J>7VIf`UiT3z7Drt zMn9wv{=a?xkGId~5L@~*_kDQI%)Y4ji7L8UWVA=;o%17OL2~(Wf(^?s+05!iuFO#t zKk!lJ38ExiM3qG^UwsspUxw!p$`BiutcJPeBDcT}lU!v0N*F-pRK*adAtWA>c+1rR z13!84A-++s_O=t7&HiS%D1c7N?`@ao4P?H8%jW{TMBN^;N@}lFGmBFJUJis$%!y~L z=JXb=NLO!U`XEJwrvpq8K1?1x$RP`rKN%v|_laB#l6rxNmLQo4?=ad563lt8NhUb% znA-7$FUC(xi1#dsAlQz6_SndIfaLbLUZTg3E))$fu_0d`kPxkl(WQ-ZSn8CQn@OI} z@>5h$OP|4V6N1tLJ9W4$emJe&vs}@mS3tA*QzyELlXRlosbe$o;~uF)6Z#6Md1ox` zD45Q%b`u7W;nSi#ItxZj1hRK&A@J0c&^8HUDr$W zeAlLi&TA7Q-fL5QCw18=FJ-B~O*tXvr<_)vzVhr^95Rq2lL@4$v0Pt^@ z$u@K|6{k#+^<)XBc|9fGA*&3WCz(;IozmYHO>2&dXVjZs(~gOJ$&AwM@r0nk+5suf)6hm2;a?O*8>nTzqn*(H|({GO&=L?5{2)V98P zg0H2RK2~^B?jE`JUKOu39#-VheA@uBtu)_$$A*igdBGB0$G_r>beZJ52kL}!;TER3 zFK}91kLcxsSMyLJ`z;ABBVH)oys4uDsbg@lkK43+bSNTM7FZb$ox-bz;ufx=&P4b- z7WUMQ&sJ?!UdwjOueI2{jnc+2j7+eu6TMp-p3wx9zk8t$r}*0w0mq(T5nB?`%R@0H zXNAiKm56LT7!?ra9*yEzF?YMcu-yxS!F@I@L@&kBW?Mm0wW7QugA zU^na^vTVcqp?)nWg8cef#PLm41jFiKf8db5&gTa6HA{~tX6PuNiv}>-AybC#wG?`l z7qn-#3Co9MhQ>CN0XKqGQ1=a1O{bP$U%ZC6z&VFQE*-bApO&l~>r!cxJZln@*bz&Q{DS9((>b9W037*}tqjHAweiF2Ui`4s zsRM__WBGz>hSqGToO2azrK_ojc4ZgyBqEE<3q#_ZQVa+>uYPju#qWKEdiLx1=WRS| zIDV`VM5vI9P_lH_(PA7oWSK-*yGSk~g#|~32n90;Vi#$=ERxOO2d<0zoQ0#o7u5GY z%(Jze<+-kAnWgVomkRjOr-H=5sR{|}RrZ-3#6Gfz@T21*mdTzVAKeNGU$8>#4Db$* z!|vg+3;(cu=^&O>xK2w492OU| z(qZ91$Pe8L0k(836dVz2LlcL8MQ-=i%m#jw>1yCj1$(1kRxwPNxr~x(9c5@&pHVeE z$Cxd05Z0_0op7$m7N)8p1g}X(Yc!T=PsWO38K>mntxZ>LZZ2GeVcS5Y84cL_Vu?_# z5Hs~r14rsJlDR6WA|nDJJ_BpaicZD{r3J%UDjkM3m&P7 z71403E`}b@LzPXw0JYki*v9uhRSKSaoi#Wfy#&} ze3_^s?K-|>tT<7@s7m9*`HK58QAN@4W~8Eds4_JQqQ{mXm>EHd3Px3CVv#Y+X2j#_ z6G6*JnG@m4w50Kbu~eUR!dgHWVq7b~+e29OLt>I_An zbpnqjiPzS;7uY-g*iDUTEtFJ;JF)j(#dE2nH7QEa?XZC)fBP!O1l;7>K9vtA(xa7^-5pp}*kX zQ~EC#@Sij|sDE%!-Ti6xSAAw5I?w)npHuhf-~2gxWo-6}J#kNAcUa`U0q7&k_#A%Dr&+IBxo*qG$$$I<9?Pl6 z`84CzY54giobXrwrprowkG{0m7r^{SorH@y_<8*=zj$1uuQ@U{vTA(B%--+MICjbL z$Jlh|$8=?ihKPpciH8?|6AsPFsbrQBd+y)Tt-ox z0p{rX86+z+TggDAl$ld<6#v`mKl@MV!rd18q<&1p9SuSZ{?nX$pE_^pnsc!5mTB5s za2ZEiLzOb-E&VD%{1e`C2(y*9;G&H4PkGB{;|1-s>^D%B%LwZU(y{v0q*vozk z1k=uOyJdRW95$5drK*?7fAeWR_p;f{S91Tt5B8^$n|k}UW7}_{1XJgebIf%`_@(;0 zWU8n3Z&st&{<@yKVlef0Ld+y@|2&KuD5+|D&Y5Gofrt4WVD5r(CB)QA1_@c&hvu*g zd6)+PbDF`7Ur&*ht6tL#>gOEPa+bxEi8)MGRz9@TU*_;1o%8CdJZD!2)vTvGM4KIN z&HaG(d5spiqa&;2z_(Su;&1G^K05=5yBWlW`yCVq!g**5?~r&6)tvK%C>|EzvH`1m zD^C<&3w{fTyBCOhRT*o->UbH8{z(katDvlHI~JXNXxyS|-q=IqslCMV3y@MjRj*yf zL;uu4lQCG?fD*r*`sGHr=?O&7q}6LVH*VAK!lr*#V4`E>hMMZA8QbnDQT4tizif15 z+f3j1bBS06=_OqC8fIq5y(FHu?=3;A`p@jdyzzCk@eni4n z_ngFt{x#r!m2vqziet%7F-Io)hgxxb&BrolUplH=Zq)g@kH?2$I;`XD#5(TFsN**Z zOnq*rv$|#0-`|utdQMcQf+U=LXv;ejbA@`d`BnriZp^Bv-3eR%5U4XUsP>qWtZ?Oi zQG`(Nw#$>V4|F zQM``V#+miT0R^J>d2`uhP8V&bSLSB@?=hl0Ue{R*W=5Quj3&|^n_Ux>NnGyTPjE6v*f0k5&R`!|93Tl?X6bDr$+?2NB!2zB<2HP28rdM=1 zi0VX+Ib<*)Zs&36p>&+He;(TyNc)1=wN8lBwk2^x^PRtK8G2?bi zw8&qRlE774Fc)>78{&$C)1MXUybx8K%I*j?VPh3_KUAQqv!&+9cSmR$e?5%R-1FoT zu5$m#`Rh*@Po7rq2yISghB~}JQ-8Ni{Oz8O5OXmM^<_UkEJ5pkmXLf@P?Gm+cB5r~m{LLK4e zta^;VjReH2TQ##1IM&AsCzI-UnRU)_5>Nlt(~kRo=B&u^0*zn)?fsjMS}!-=oDjj} z%RLgb>UfijsVo(kQ}#xlS%dsk;QepJ{CcB!okopvE7oNKvAAO;o3C=O$jukaLp1*j zVA5Tj$+E3zRb~V@QDT>u!#b+#oW$IyPW+rC@tm7y5>i&Tof_s=Stq-Q;YCLYT)ACy zb^Vz{=A+&n&;D}~k$-iLG6RWUSxS**`|qc zM$M0n7-npFUfr@8&pAz^CKl+GYnhoL*GRn70=;?-Gw*eS5;3}9u3EqD#CzS3(27G5 zwCsYpdR;TEIqaavm&{dbn>+0^;@~Bh%+>3d@t{!$Q+x`dqcvo%mZv+2$;EQ@T4ubq zX))DwUw+0yRSei} zd}Rzh{@pg|=#?#y>sMEC8FWmUKLRJ=s@6D%b~U5vsPa^x<@P47VKi~6J-s}tTF+dc zy0!&!$74xmg>apNo%+sRXJcn~;*I8D^r@|F;dg9-#0;tK-d^vPApDA;bnoiDDX}(G zUmCfgK(?{>DViDQChYwaVBW}>?i+LN3y2-ne{RC})VGmts#2ED$JWIAiTc#j&H0je zA8(WfsX7kKO~gdiCxLEhvlM|TDtqPJ#4LL#N^xtv6#Oko^X6YJH{7A2dPx%V?L3s^ zwtPwStdJ^5)pnVeh;tr}a@?LTNAdZ+sVYnEP2!GtS@@fq9kxbpe!Me}>i%5_k-mf**Nh=tIp)TP~h^ZAH8gzg1(iD(!Z!t^WR$Uq{g!oUiB5!_$7@kKIATe zDvzbPQ≤q`*Qm&uV_rY)RG^e@2rlAIZJ(g>{lh8{KS1nc)9y| zuaM@RU%_2S>B@V#y!NAI@$G_il9zD@ms#`ONYNM|+7^ud29&h?d-5VbM@ literal 48602 zcmeHQX^>r2wO#~NM2rz|Ld1Y5gAgEO0(nRV0)Y@>5(X8M+;rchFS*^fxjm2s2w|SW z6ox>+Fi&BgAq3?4EXuMd%AzQXqAY*BAO5f`%d-684@%#+hjq@{W8cGV-~01YwbFgo zUVD9euf6tg*53Q{K_}MQJ$)zbb@XKS-?7t=n>AzB)TvWvxcp=62

&NqK6MN^vHokik|N^^x7gKMOQp#=qA`l z(av8RdT1w+qPLocuKfp*qT69pMOU0)=q98qdT61cH>VLP+PU1&HK?be+s-%iGHkEt z(q|3ba3YbSjr$wAVF#W;SAJsX=2wXnJv!OYRefkj&@BfWx(>f9x_z6WtLGCby0vZS zWz<>Gs~;P>@+#;BZG?VBw@xs0^-kmi-TFsE*G@!zL0e8TbQ$=HHt%8RGU!)ywH_t48rpaR$_2Xd07GwVfL>7j zLqqrMLi>Z(K)0ehR~dR^PsoAR9%1O>zY;0hfVwMM|E!^lz|(XIeuCDmG_+we;jgP{J84Go~|ikcG*4V;E& zQ0p*5mn51?^Q0qQJBPf%i#%~P`{~q|Dq0N>)!B5Z~6AisIiAd248x6gP_S1A7euA!F zX6TMhkO#d2IYsrAhF*Ic{SWl`4nvQjT#6q2wV?+V5h=PHbx`!6rOS~=(RCjiy8d0@ zfL_{SXzTAV-hl2#UPU`jGxRLv6+H#tQ}i6_rs$b-3_br>j0>PAjxqEkY^vyqord}* zVO&x4H$(TKPKwrjU}zA(D|+}LL-$^T@dxzgeTMEw8z{Q=5<^>_M!y2xH^I=)iAd8` z_z8OEBSXD^CQ`I%rJ>Cm(f>fZ)){*I5ZDj&*hEA3qdbZp_>G}Qwqx7_J-txTiWLja zTzuA&Wh>^LvUKi}1&f!RwPeNUNONs%;HYD1#fn8|FI*T(&}A%Q1eTq4_M$UZ%$vPz zHnKGO2Um=)o!hS0hU*IkhK6ecJ#`8gEH|${G~8~j7lJ;AB~KX`>94n;baCJMp+--w zIj=t2=&27ucQT3do?0JvbInKTM%p@aIxU^M)$XqiFK)L6>+Ru2LW?hx)V83%wx&Mc z7av_46f7>7)SEA#NKI<^(MWxyKEKxAXs#FHk&?v)lX}mr_qW<(s0?I;Hue@2s%Lg@ zZyPQEV+CB1!}y9T@9|jLc&q;U4-7S01CBL25({)1 z>@hT^g#vwC#)Y-@^>)5|zDS_U!1`jMpT-4P58BcQXty$rD;X@Olb4RH>Te7~NIp3d z=hhm-3woPs5YjTmGfNrW1$%U@$6&eH&1S0y1CVh|sPh;sH@{skWyAxgpII9$RFuDS z>e&PRB3(YQ#Qf&SP#-c5)_aYfCAEPydK5~faLxq-qqSzESJfewJ)6uriqIwLvKBeQ+I3YZH9h1~g$AKG zRFF#a^DiB&wTEo$C$uC*gf4fsGC6b+I+-VO;*6ntg0j33=Dr~~ENRBW`E@ysn4dE}vM!W@QoQk!4U*oi1xC87#Nd%E4g8ubieW$SC50 z(@ojX%9v+cttM3X9G0|2l%fHPWl{1%4xl4V?q30ya~M?9Ig3KrN0za0g^>HqQc}}7 zjrv8NHqByLM}rj(c`R)->+V%3HBT&I^RkHZ$TF~vPRE4Qt`D_F+C6p5fUOZbE+*1A zwCIh`$5w=$vd*OKLGUD{Xc#dzE59nq7ZfpVBEZ!6}c73v)>=P=3^HwNp?#z1{; zt*5W-Yc7{_SPVC@0CCxz2ex$*=GBI4XsRfi^PE|4uc3MQ`;Ca8a+dt zRpm;hxH&Nu4vygc#2TZ_7$k5$&lW)PIV|bgxp1r~oAY?OicE1n=Q1jn*7^sVb+xi~ zpIOT4D8)Pm%bDCr>}fX!hg)q7WJNjBX`I_Cjc3m!a(*Ad8463Sn$39vwKh|n&$;vv z>}j;~oX6!Hym%~=VH@Q$i3HB47L-Ar2$SW{9#reSAV-wVd5qoWw`=|NReG^j_<`iH zG<2(Xc$rio|(!v9v9hOezW{nJlmULnc_~kSvyE2e4^oUX-5UC2&6F1YXkU z+6a^7=hW9U2AmAAQ-sO#rwsJ^(gBNQWqpf@J;G%9x&6Ir?a5=O@;8qoB+*gSlsMRG ztH~9DlHtlN?P&}Rwe&cq#rWBkT8O#p+(z$kU!kt#xRUeggTsAu8a3^-T4Tv^CFeBZ z;B#9#0;c=5DW}JF^B7&Nk%3-czu~#kOA$@zX*AI$ci_>rV&-gD7AqB+Zi zXuc(NyutA+fibKpOqn3^sg?}}Altqk-^;ZCW5l|`e{2@X^yJS$K#nt56Dn|*E%MB3 z*YrZNzgPd}rD&jfud%8UU`8m1*UcX5j8@QaLdp8NdN1^Z8Jq-}uQ4=O8}6~bIgg(r z5}sNsS#U*;xvjzV(yZdylhTVyV*w>(9)--DCZa1Uf@x&RMwdb~)t+p(+$MofOAGZC^&laZnxn~a#V-3xb z3pl(kgXD@F7*AWx(Yio!I6@KwB)=@0VaPlQ2|YFo8SyNn)XTOOvYksj3n?DO=hWKm zhFL`R7ehNUg}k0~^ed!wA~i9YXCZ~TM#vOTLPD3hcoj9eJf4u&L)@}PKUObT>>6p0 zC#03L7OJ7tzWb^cilvC0ixI8!<^h)_WVI#4!p2O4${d0zPxa_vu<5_ zQEjlNuhtka)r~M&o?FMw5aV(VG)SRdk2PiWBYe&^yGI9Ml|$-GreHBH=Ro%wsoQEA zX`jQAfs3eFu~`#aN{LpF%8jYDSsOsPc(-p1)uFJ^&-hBnI(@y@GE1wDDHUo|4iqV( z)fA?(2ID9~%+xN*<~*>zETP|p%r<~+5LHoMfb}>ai;YRdFp^Dnbs#j=uJ;zsXf0_J z#M0c{=Qak8o9c{UCa2HmT!Hs#LF>4+VeIea$FrO`V+5OmSi-8ELkzFUq#&<5)}GpK zjSQ;cyigELXDvZ3dCz<*RAds9`AheVYLLlE9sc%5R8uIF)Taj+e}_A&G$EPPZ1${V zr6N`fg_8O*n>x|*X62J8YCpL>P_t{}vh=YgPs)y|z1Udk3?L>xCWhW0>P){c%DUB? zqj^a%?WF7+@}#t$=v-lk#j#->yS#M9$~1WMNU=Pi+~E1uTap z?W|zp+duAE)q^*@fze`g%_g%BUM;1R}Yz!Hp&21H(FI=ZYmIw*S zPjMGuI^PBO6;75OfzzYs;q>XPJwzYjbm^8`h~B%K==~>&ez}9_-KU6_KTCAZb3`BF zRO+@zak?F+M*pyh=%1U3KDY~XC(^ePEyM4BN7|IvAqV>V9YpV}#%a@SI05@A(cZg= z=G_DSD@1!j?twMX)kn0~y+nsSOmyHQIMMn%(W2LIV)Y7?=O&_GK}EM5m*EQ&7KuplyjaM_JxOSw4$;-T++(Kz0LUKdj?qF6uu8^`0=m+ma*zZ2% z-39({!T&w-Z${cDpow=7O?n9@@n1mwUc^aj(6Z}s0vY^`sP9VD@olunH`m;Y^1g{W+z;EKt)AY3_JfQ%HhmT7Bk1~bFUkg=-H7&E zw~Ocyly4%+_W|L?g70uBA3TPeU|7ExM2f=_Wfv%wIXcJvc8)-GwX$?I` zmr*bEP#;}D4O&I(>2kV;X3#A9G0mq%w2*eum*`#;ehM8*%jgt3f&PQ`pxJa99Y^QT z>GTR6Nk63Rw1w`cnRGn;gwCQf=`ET>N7G?+HvNE3qR-H&G=~<@G&+~gpjYWAI+<>w z+v)4Hg7%^P=*zS(JxCAHXX#t?Gg?CX)4}v2y++T{>+}YFnva5|P=q66q{T1wyl{}%W^ zZ-L!$ivQEx3gFo;yZG`G)q1VSXb;#ryUasDa=GBrCno&dX0k4FWj3_^m$@=e@aD%L zDvO?yeH52JwNQoEGqJtFYM3j6@-hj^5|vaL6_nT)^)mA2i&Fw*N$!Ehx9rp|S7O`A z-xd=E&`J4Su5#y5<}0|oL+B-Ho69PxT^G$P_6NNjcT3{)R~X?nUPM9SVT`m0L|u49keJ8xQKG6 zcC+KVu&KRFeKX5$eizCG(>ZNwUWH{0#?68h_spZ4T#jPS89V<4TN&S+l}%;$-qC$A z)ykz)=*ozzm_&5N`FU(oTuxxbpD zx2}3li`q8QGc|r}O412@*3-qXTtATJ2^Ht?J-$4d93%x>I-Z2w!E}>SJ2y5S<2k>> zbu4dFV(xE~@WV!t90w0lLk|x`5-wiIIX-T=u8Ufx>!ZvRhdJ3;Hp%ru$&48z@I#w{ zovrXskMBCDm*{z@O$}YtCPaMHrua_kvQu8lQh}RtLd;J&tvveR*|pe@l;e+yNvjcC z-$~;vnAe?EZ(+q^v(98xn=bwp!cvF5A!|foQ|BgMAjU1(kxmxwSSxQ>2C!}<% zA|svB+gz<$9unN(alP?i#ms0=PH zi7IjTk2Sp8q3ya$o4MS7^&6v;JYK`Zv$*>jqQql}XO(~Yt?kraGCG1=Acm;4xn1lJ zC+o=)PV>6R{P@dH-6>sWUoxXKyRjHFSSR46Oa@jDh5nh7 zbcLeg8TF>aM-7|?$z;kwnWc{$bhF*c&OgRY^dYUJACEm%xvpyE#lfqj>Q0Vdsn(Zw z8uf)kp%IM|4=y32S)BSekVNKUU8oz$u|r2U@NswFczMyDxm%6ee^kuSv~iI~^KBQ9 zt)}_*s|s9b%?sA7I=~ZOwaO&t#YiWV%b_sMeWTIhdQ2@>fOwU1g;HliV&6{abRRt; zSC&9o0#0npg&TX`QCGXy?6Z}X%jvdPX*;c2S{98T8y+SG|LVkdI%LfHyq0eWiNH)OSUo;f3Cm}&w7!m zQl}0{6_4e+ni=V`p;pdSw3V)t9#WO3m?sglVqO?h<&>h=>b&~Nu@}F>6zbWp<6kZC ztl{{vMsNp1&En`27by`OEs~3PQ^ApeCcz9sp+%Yuf-=(&e->AA3a3K5t1H{)*;>i+ zT(`2!(od}W1bpc~L1N%Qg@pAW`^?T@AK4rD(QyaMrs^&vgebIcmb3Fc%| zi*w0ByYVd!2l|I*D+9$!-JoJh1L(5b2To3UF~ z#8DFj=XCkG$GOepc9Bd#PV4AIx-e^8*bTyEv=)L1g&Y+#-K+ErV=Wh)Wp$HvCR&WI zd2tS+#ZWoTHjZ1QAOIFIg)b9Tr2WE|j1?y;7*%O}IA3vJCaNeJHj7j=4^^f{LG(}( z1j!;OQNgInOpGgL*^GExeLNTzDRLMFOAA?Jt6h@veyHnoVdyKOE*JDN;z77evteF0 z(51QY5SLDh1h+=lX2V+eC|42CihrQXez;Qvu`EY~uiOuS`k(rAzqV5>gY_G#yWrKV za{9N+UCrgyseaI-j#?RD;mZkrAZm223_;;3{1nsJ*E|%48*oa0eVISYJf*kUR5t|1 z`U?Tmj-O@!R=~==dav1?+WoKobRted?}fe-JeE(_9Axq5@PGsX39He&solKMF zOi|wytsOFz6ZXR|?}Pr^GIqz$lj)dutlSLC#%W{x10Kt%$CGLDI}`A8Uz{pd|Mr%Z zI+XUE^f@qptCMh)8$Y)$-{;ImPrEhLT0K1ZuqB62UbtY<88+Sdu{WMnNfhC9z5Z;5`mw@WCTMTjsY|EI826T5lT`dKyyZA% zDsRD&eCMC?mVb{IbkeflLRpd@Ki^AwN+-Q+>a4Jr{Q(H3o#S@P^s?z}DAh|HhH)jt)T>mWw9q9t(Uy5`26 z{R(8U(cKkVZf*6I5RGqcNpm{gv!jtUwnn0oSX)H@HIVCCxfwE(L;03eMjE87uf zV&*s$zVmg783tY?Y<15`jOgD4?l%~hPb)Z*uYhq;f-`~eFCnLJ5TW-|(rjJ*i znCJ>~T4GK0HQ4ij0ux6!y0s<$F-OmqYAyUNiKFL4b!J3XOFneuor&J3-ZBo1pv8?@ z88tDUs@3ARJyiSITv^G={y8nv7bY`ad6uRttm&DR=|KW(+*WRx+3?^3&p!qvt5~)5 za&yvm7*U=vl3f*W`%zQ;~_-iA9<6YmAy1m z$L|YFah^`XRkvxT6^{(?>}kF35Hr2vCxgLLvz$*?(c7--0v3@xWJ^omU7WrES61Zv$=HjJOLtJsb^WTMfT!<O*<_QAN*@BmVmAxg`?|B1tUEiz)Mq5^5umQIbL*Zt0((5xJvpqo0?Qr(m(5t& zV{@bWJc%Vwn0JSmiSwNz5F^(ay28y_b-uujgiotmHM0^p)yE1alj?Yxbw5xvFJzZ$9TstjLnNDpqc6|Cxi8S~6Ef z%++qWM2sw5t77GLEK0|u9dwqw*z03c)WP+b{x;FTUW*KB8%n9 zHcgB(>Wo@X3^TS^u5Q`HxH=iAy%IICT&`Tp%nVtVc&X)b^%`d0vQ|sP=z_Uw{bnT2 zQXd6-u8E*!7tGb`nrY2G2R*)Iu3FpNJ7vScOD>tK*D>Qk7dV*WPI*^r$XqSgI*7@| za`jqfytZjE)pV~m3iO9JWz#vMUq|O;GJeyq(Zb92(xei+6TF#;+Nldy0~XKu_Gk14 zem5n^#e7?_5(Jm~yFs6su(!J6GU%f7jp*^WKw_nL49aj}ybR-NgWOJPJC0v`J)s-h z=SJuu7uUU)%AaPC;FbGzuGYgj^ltBiLUCKTX&BdTzrYbzMPsZ!ES4s zc_Y54KrCNQyT9KvEwNhMA3bSZff&0OQgz&%k(m3w52W=P37?`cRy1(#r#!l9$y_9G z!&ul|-cQN(*^3$5X;QN|F1|w2UClEiN5GxgaT2`3*qY2DoW^%W5`uRB+^XKpOHvcM}hp~c5Fy51AcjiFa%u>qlrtZ>BT|Sdgl7njbk8pJ(gr<=gkgw`qNe2 zwDF9@%KBJ%^-Ue&cWr@0zf$-2Zgxu$esxP$z4CaHi;Ub-Aln%5IL(YR6ZZZAYI`eV zx^Hf|FNSqh|CtHjQ{Otdtx8$CA6pY|J?hgfTk<6-zG2q=mn3E;-cQsgK5p-@6oDx! zd*#f;EPFgkaYwuq{OvXK?o=)}++|huk|gHaSt!Y!`I6{aAyts7bh)=-28ZV9@!hBE7v(OW~n*io&<8M-Cr9{IY4z!On88b zxNhsL1YO!-W@d!CH(mn0Rtdiv*p2tiOzh9hgdOgyPzui&mD?k?YWR7)9Oj+&f5juV zcUA(=2vvJUZnn5TUV`|y8l^*2Z;8a5rDAFiq)O2mtha|7buUy|9Xrt{)Y<3<$HMN` z`Uz~cVt6Qyts@QLM=YiJEBipEKR#Tc6rM3Ew?uA+d?a3u%m+2yiK?niW+m41|ACfy zv~pRxwM=frd@NC##HT~0qf~C8+@9g%*>ac}#C&d47NKgpT3wbU2$o@UJA#0;*Ro4=k`$}$iwN1=UYG!;+qvpTCE5T*; zJE`%sMC*P7*w1Th@qu3nRJl!a@sk$}QoX&V&ZpY+DYI_Z9JyHkiyBi;^YY`w-PJKM zo2jp%yj0@JPak)Mm%B0aa*1dEBAE1z>R$%Roy~ov#EKu%sqQU_KB3NfzpBynhb^4V zbf>#J51sUc+_}=%0>to`x3q3Yc!{2;#?{wLymF^Egsg1U-0uAwCFXdXcTS9?>YG(N zOC%ln>9&_oOZdvyf%T@w!nucWs8en<*`;xUkBI- Date: Wed, 2 Dec 2020 18:10:42 -0800 Subject: [PATCH 25/48] Validation layer can now be enabled through the vk_validation cvar. --- src/refresh/vkpt/main.c | 112 +++++++++++++++++++++++++------------ src/refresh/vkpt/vk_util.h | 26 +++------ src/refresh/vkpt/vkpt.h | 1 + 3 files changed, 85 insertions(+), 54 deletions(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 9a50cc346..747275646 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -71,6 +71,7 @@ static int drs_effective_scale = 0; cvar_t *cvar_min_driver_version = NULL; cvar_t *cvar_nv_ray_tracing = NULL; +cvar_t *cvar_vk_validation = NULL; extern uiStatic_t uis; @@ -396,11 +397,9 @@ LIST_EXTENSIONS_DEBUG LIST_EXTENSIONS_INSTANCE #undef VK_EXTENSION_DO -#ifdef VKPT_ENABLE_VALIDATION -const char *vk_requested_layers[] = { +const char *vk_validation_layers[] = { "VK_LAYER_KHRONOS_validation" }; -#endif const char *vk_requested_instance_extensions[] = { VK_EXT_DEBUG_UTILS_EXTENSION_NAME, @@ -410,30 +409,28 @@ const char *vk_requested_instance_extensions[] = { #endif }; - -const char *vk_requested_device_extensions_nv[] = { - VK_NV_RAY_TRACING_EXTENSION_NAME, +const char *vk_requested_device_extensions_common[] = { VK_KHR_SWAPCHAIN_EXTENSION_NAME, VK_EXT_DESCRIPTOR_INDEXING_EXTENSION_NAME, VK_EXT_SAMPLER_FILTER_MINMAX_EXTENSION_NAME, -#ifdef VKPT_ENABLE_VALIDATION - VK_EXT_DEBUG_MARKER_EXTENSION_NAME, -#endif #ifdef VKPT_DEVICE_GROUPS VK_KHR_DEVICE_GROUP_EXTENSION_NAME, VK_KHR_BIND_MEMORY_2_EXTENSION_NAME, #endif }; +const char *vk_requested_device_extensions_nv[] = { + VK_NV_RAY_TRACING_EXTENSION_NAME, +}; + const char *vk_requested_device_extensions_khr[] = { VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME, - VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, - VK_KHR_SWAPCHAIN_EXTENSION_NAME, -#ifdef VKPT_ENABLE_VALIDATION +}; + +const char *vk_requested_device_extensions_debug[] = { VK_EXT_DEBUG_MARKER_EXTENSION_NAME, -#endif }; static const VkApplicationInfo vk_app_info = { @@ -469,17 +466,6 @@ get_vk_layer_list( _VK(vkEnumerateInstanceLayerProperties(num_layers, *ext)); } -int -layer_requested(const char *name) -{ -#ifdef VKPT_ENABLE_VALIDATION - for (int i = 0; i < LENGTH(vk_requested_layers); i++) - if(!strcmp(name, vk_requested_layers[i])) - return 1; -#endif - return 0; -} - static VKAPI_ATTR VkBool32 VKAPI_CALL vk_debug_callback( VkDebugUtilsMessageSeverityFlagBitsEXT severity, @@ -759,6 +745,15 @@ create_command_pool_and_fences() return VK_SUCCESS; } +static void +append_string_list(const char** dst, uint32_t* dst_count, uint32_t dst_capacity, const char** src, uint32_t src_count) +{ + assert(*dst_count + src_count <= dst_capacity); + dst += *dst_count; + memcpy((void*)dst, src, sizeof(char*) * src_count); + *dst_count += src_count; +} + qboolean init_vulkan() { @@ -768,8 +763,7 @@ init_vulkan() get_vk_layer_list(&qvk.num_layers, &qvk.layers); Com_Printf("Available Vulkan layers: \n"); for(int i = 0; i < qvk.num_layers; i++) { - int requested = layer_requested(qvk.layers[i].layerName); - Com_Printf(" %s%s\n", qvk.layers[i].layerName, requested ? " (requested)" : ""); + Com_Printf(" %s\n", qvk.layers[i].layerName); } /* instance extensions */ @@ -812,15 +806,32 @@ init_vulkan() VkInstanceCreateInfo inst_create_info = { .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, .pApplicationInfo = &vk_app_info, -#ifdef VKPT_ENABLE_VALIDATION - .enabledLayerCount = LENGTH(vk_requested_layers), - .ppEnabledLayerNames = vk_requested_layers, -#endif .enabledExtensionCount = num_inst_ext_combined, .ppEnabledExtensionNames = (const char * const*)ext, }; + qvk.enable_validation = qfalse; + + if (cvar_vk_validation->integer) + { + inst_create_info.ppEnabledLayerNames = vk_validation_layers; + inst_create_info.enabledLayerCount = LENGTH(vk_validation_layers); + qvk.enable_validation = qtrue; + } + VkResult result = vkCreateInstance(&inst_create_info, NULL, &qvk.instance); + + if (result == VK_ERROR_LAYER_NOT_PRESENT) + { + Com_WPrintf("Vulkan validation layer is requested through cvar %s but is not available.\n", cvar_vk_validation->name); + + // Try again, this time without the validation layer + + inst_create_info.enabledLayerCount = 0; + result = vkCreateInstance(&inst_create_info, NULL, &qvk.instance); + qvk.enable_validation = qfalse; + } + if (result != VK_SUCCESS) { Com_Error(ERR_FATAL, "Failed to initialize a Vulkan instance.\nError code: %s", qvk_result_to_string(result)); @@ -1163,19 +1174,38 @@ init_vulkan() .queueCreateInfoCount = num_create_queues }; + uint32_t max_extension_count = LENGTH(vk_requested_device_extensions_common); + max_extension_count += (LENGTH(vk_requested_device_extensions_khr), LENGTH(vk_requested_device_extensions_nv)); + max_extension_count += LENGTH(vk_requested_device_extensions_debug); + + const char** device_extensions = alloca(sizeof(char*) * max_extension_count); + uint32_t device_extension_count = 0; + + append_string_list(device_extensions, &device_extension_count, max_extension_count, + vk_requested_device_extensions_common, LENGTH(vk_requested_device_extensions_common)); + if(qvk.use_khr_ray_tracing) { - dev_create_info.enabledExtensionCount = LENGTH(vk_requested_device_extensions_khr); - dev_create_info.ppEnabledExtensionNames = vk_requested_device_extensions_khr; + append_string_list(device_extensions, &device_extension_count, max_extension_count, + vk_requested_device_extensions_khr, LENGTH(vk_requested_device_extensions_khr)); device_features.pNext = &physical_device_address_features; } else { - dev_create_info.enabledExtensionCount = LENGTH(vk_requested_device_extensions_nv); - dev_create_info.ppEnabledExtensionNames = vk_requested_device_extensions_nv; + append_string_list(device_extensions, &device_extension_count, max_extension_count, + vk_requested_device_extensions_nv, LENGTH(vk_requested_device_extensions_nv)); device_features.pNext = &idx_features; } + if (qvk.enable_validation) + { + append_string_list(device_extensions, &device_extension_count, max_extension_count, + vk_requested_device_extensions_debug, LENGTH(vk_requested_device_extensions_debug)); + } + + dev_create_info.enabledExtensionCount = device_extension_count; + dev_create_info.ppEnabledExtensionNames = device_extensions; + /* create device and queue */ result = vkCreateDevice(qvk.physical_device, &dev_create_info, NULL, &qvk.device); if (result != VK_SUCCESS) @@ -1201,11 +1231,10 @@ init_vulkan() LIST_EXTENSIONS_NV } -#ifdef VKPT_ENABLE_VALIDATION + if(qvk.enable_validation) { LIST_EXTENSIONS_DEBUG } -#endif #undef VK_EXTENSION_DO @@ -1355,6 +1384,14 @@ destroy_vulkan() qvk.layers = NULL; qvk.num_layers = 0; + // Clear the extension function pointers to make sure they don't refer non-requested extensions after vid_restart +#define VK_EXTENSION_DO(a) q##a = NULL; + LIST_EXTENSIONS_KHR + LIST_EXTENSIONS_NV + LIST_EXTENSIONS_DEBUG + LIST_EXTENSIONS_INSTANCE +#undef VK_EXTENSION_DO + return 0; } @@ -3120,6 +3157,9 @@ R_Init_RTX(qboolean total) // When nonzero, the game will pick NV_ray_tracing if both NV and KHR extensions are available cvar_nv_ray_tracing = Cvar_Get("nv_ray_tracing", "0", CVAR_REFRESH | CVAR_ARCHIVE); + + // When nonzero, the Vulkan validation layer is requested + cvar_vk_validation = Cvar_Get("vk_validation", "0", CVAR_REFRESH | CVAR_ARCHIVE); InitialiseSkyCVars(); diff --git a/src/refresh/vkpt/vk_util.h b/src/refresh/vkpt/vk_util.h index f7bdc32d5..2eafbbc3b 100644 --- a/src/refresh/vkpt/vk_util.h +++ b/src/refresh/vkpt/vk_util.h @@ -94,11 +94,8 @@ uint32_t get_memory_type(uint32_t mem_req_type_bits, VkMemoryPropertyFlags mem_p const char *qvk_format_to_string(VkFormat format); const char *qvk_result_to_string(VkResult result); -// #define VKPT_ENABLE_VALIDATION - -#ifdef VKPT_ENABLE_VALIDATION #define ATTACH_LABEL_VARIABLE(a, type) \ - do { \ + if(qvkDebugMarkerSetObjectNameEXT) { \ /*Com_Printf("attaching object label 0x%08lx %s\n", (uint64_t) a, #a);*/ \ VkDebugMarkerObjectNameInfoEXT name_info = { \ .sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT, \ @@ -107,10 +104,10 @@ const char *qvk_result_to_string(VkResult result); .pObjectName = #a \ }; \ qvkDebugMarkerSetObjectNameEXT(qvk.device, &name_info); \ - } while(0) + } #define ATTACH_LABEL_VARIABLE_NAME(a, type, name) \ - do { \ + if(qvkDebugMarkerSetObjectNameEXT) { \ /*Com_Printf("attaching object label 0x%08lx %s\n", (uint64_t) a, name);*/ \ VkDebugMarkerObjectNameInfoEXT name_info = { \ .sType = VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT, \ @@ -119,29 +116,22 @@ const char *qvk_result_to_string(VkResult result); .pObjectName = name, \ }; \ qvkDebugMarkerSetObjectNameEXT(qvk.device, &name_info); \ - } while(0) + } #define BEGIN_CMD_LABEL(cmd_buf, label) \ - do { \ + if(qvkCmdBeginDebugUtilsLabelEXT) { \ VkDebugUtilsLabelEXT label_info; \ label_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT; \ label_info.pNext = NULL; \ label_info.pLabelName = label; \ label_info.color[0] = label_info.color[1] = label_info.color[2] = label_info.color[3] = 1.0f; \ qvkCmdBeginDebugUtilsLabelEXT(cmd_buf, &label_info); \ - } while (0) + } #define END_CMD_LABEL(cmd_buf) \ - do { \ + if(qvkCmdEndDebugUtilsLabelEXT) { \ qvkCmdEndDebugUtilsLabelEXT(cmd_buf); \ - } while (0) - -#else -#define ATTACH_LABEL_VARIABLE(a, type) do{}while(0) -#define ATTACH_LABEL_VARIABLE_NAME(a, type, name) do{}while(0) -#define BEGIN_CMD_LABEL(cmd_buf, label) do{}while(0) -#define END_CMD_LABEL(cmd_buf) do{}while(0) -#endif + } static inline size_t align(size_t x, size_t alignment) { diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 32723ce0e..22f654974 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -189,6 +189,7 @@ typedef struct QVK_s { VkImageView swap_chain_image_views[MAX_SWAPCHAIN_IMAGES]; qboolean use_khr_ray_tracing; + qboolean enable_validation; cmd_buf_group_t cmd_buffers_graphics; cmd_buf_group_t cmd_buffers_compute; From 2a659a372e100f2ff5e7f7c1566529840adf5e3c Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 2 Dec 2020 18:12:25 -0800 Subject: [PATCH 26/48] Updated SDL2 to make it compatible with gcc10. --- extern/SDL2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extern/SDL2 b/extern/SDL2 index 59b174ed8..fd196c446 160000 --- a/extern/SDL2 +++ b/extern/SDL2 @@ -1 +1 @@ -Subproject commit 59b174ed8a324f9cb22222b23b1d6a67e99ab3af +Subproject commit fd196c446a476282dd457fbafe9e9b3feae3f370 From b5c5e171a3f6d21245911293e5cf6541fe9017eb Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 2 Dec 2020 18:27:50 -0800 Subject: [PATCH 27/48] Added "Unicode True" to the setup scripts for compatibility with newer versions of NSIS. --- setup/setup.nsi | 1 + setup/steam_setup.nsi | 1 + 2 files changed, 2 insertions(+) diff --git a/setup/setup.nsi b/setup/setup.nsi index 32741fb33..a5c301a4a 100644 --- a/setup/setup.nsi +++ b/setup/setup.nsi @@ -1,5 +1,6 @@ RequestExecutionLevel user +Unicode True !include "nsDialogs.nsh" !include "MUI2.nsh" diff --git a/setup/steam_setup.nsi b/setup/steam_setup.nsi index 2f874a3ac..e1a12b8c5 100644 --- a/setup/steam_setup.nsi +++ b/setup/steam_setup.nsi @@ -1,5 +1,6 @@ RequestExecutionLevel user +Unicode True !include "nsDialogs.nsh" !include "MUI2.nsh" From 043bc136c2d3ef05eb1dc749a9fc76d736425960 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Wed, 2 Dec 2020 18:31:58 -0800 Subject: [PATCH 28/48] Fixed the default value for flt_taa. --- src/refresh/vkpt/shader/global_ubo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index e2cfaa341..1dc559293 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -58,7 +58,7 @@ with this program; if not, write to the Free Software Foundation, Inc., UBO_CVAR_DO(flt_scale_overlay, 1.0) /* scale for transparent and emissive objects visible with primary rays */ \ UBO_CVAR_DO(flt_scale_spec, 1) \ UBO_CVAR_DO(flt_show_gradients, 0) /* switch for showing the gradient values as overlay image, 0 or 1 */ \ - UBO_CVAR_DO(flt_taa, AA_MODE_UPSCALE) /* temporal anti-aliasing mode: 0 = off, 1 = regular TAA, 2 = temporal upscale */ \ + UBO_CVAR_DO(flt_taa, 2) /* temporal anti-aliasing mode: 0 = off, 1 = regular TAA, 2 = temporal upscale */ \ UBO_CVAR_DO(flt_taa_anti_sparkle, 0.25) /* strength of the anti-sparkle filter of TAA, [0..1] */ \ UBO_CVAR_DO(flt_taa_variance, 1.0) /* temporal AA variance window scale, 0 means disable NCC, [0..inf) */ \ UBO_CVAR_DO(flt_taa_history_weight, 0.95) /* temporal AA weight of the history sample, [0..1) */ \ From 6679d9ed08df6fab24d20e726bedcc50c482e0bb Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 3 Dec 2020 07:32:37 -0800 Subject: [PATCH 29/48] Fixed a buffer overflow in extension init. --- src/refresh/vkpt/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 747275646..5fff2cec5 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -1175,7 +1175,7 @@ init_vulkan() }; uint32_t max_extension_count = LENGTH(vk_requested_device_extensions_common); - max_extension_count += (LENGTH(vk_requested_device_extensions_khr), LENGTH(vk_requested_device_extensions_nv)); + max_extension_count += max(LENGTH(vk_requested_device_extensions_khr), LENGTH(vk_requested_device_extensions_nv)); max_extension_count += LENGTH(vk_requested_device_extensions_debug); const char** device_extensions = alloca(sizeof(char*) * max_extension_count); From 37892f90b987cc4efc9b2fd544de5f95dc8c8476 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 3 Dec 2020 09:46:27 -0800 Subject: [PATCH 30/48] Added KHR_deferred_host_operations extension, which is required for KHR_acceleration_structure. --- src/refresh/vkpt/main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 5fff2cec5..97b7f6bca 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -427,6 +427,7 @@ const char *vk_requested_device_extensions_khr[] = { VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, VK_KHR_PIPELINE_LIBRARY_EXTENSION_NAME, + VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME }; const char *vk_requested_device_extensions_debug[] = { From 5a64117ed36d793068890639912eaaf28dfb2676 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 3 Dec 2020 20:11:44 -0800 Subject: [PATCH 31/48] Reduced the set of requested device features. --- src/refresh/vkpt/main.c | 66 ++++++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 97b7f6bca..29e65f72a 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -1115,20 +1115,20 @@ init_vulkan() .fullDrawIndexUint32 = 1, .imageCubeArray = 1, .independentBlend = 1, - .geometryShader = 1, - .tessellationShader = 1, + .geometryShader = 0, + .tessellationShader = 0, .sampleRateShading = 0, - .dualSrcBlend = 1, - .logicOp = 1, - .multiDrawIndirect = 1, - .drawIndirectFirstInstance = 1, - .depthClamp = 1, - .depthBiasClamp = 1, + .dualSrcBlend = 0, + .logicOp = 0, + .multiDrawIndirect = 0, + .drawIndirectFirstInstance = 0, + .depthClamp = 0, + .depthBiasClamp = 0, .fillModeNonSolid = 0, - .depthBounds = 1, + .depthBounds = 0, .wideLines = 0, .largePoints = 0, - .alphaToOne = 1, + .alphaToOne = 0, .multiViewport = 0, .samplerAnisotropy = 1, .textureCompressionETC2 = 0, @@ -1136,36 +1136,36 @@ init_vulkan() .textureCompressionBC = 0, .occlusionQueryPrecise = 0, .pipelineStatisticsQuery = 1, - .vertexPipelineStoresAndAtomics = 1, - .fragmentStoresAndAtomics = 1, - .shaderTessellationAndGeometryPointSize = 1, - .shaderImageGatherExtended = 1, + .vertexPipelineStoresAndAtomics = 0, + .fragmentStoresAndAtomics = 0, + .shaderTessellationAndGeometryPointSize = 0, + .shaderImageGatherExtended = 0, .shaderStorageImageExtendedFormats = 1, - .shaderStorageImageMultisample = 1, - .shaderStorageImageReadWithoutFormat = 1, - .shaderStorageImageWriteWithoutFormat = 1, + .shaderStorageImageMultisample = 0, + .shaderStorageImageReadWithoutFormat = 0, + .shaderStorageImageWriteWithoutFormat = 0, .shaderUniformBufferArrayDynamicIndexing = 1, .shaderSampledImageArrayDynamicIndexing = 1, .shaderStorageBufferArrayDynamicIndexing = 1, .shaderStorageImageArrayDynamicIndexing = 1, - .shaderClipDistance = 1, - .shaderCullDistance = 1, - .shaderFloat64 = 1, - .shaderInt64 = 1, - .shaderInt16 = 1, - .shaderResourceResidency = 1, - .shaderResourceMinLod = 1, + .shaderClipDistance = 0, + .shaderCullDistance = 0, + .shaderFloat64 = 0, + .shaderInt64 = 0, + .shaderInt16 = 0, + .shaderResourceResidency = 0, + .shaderResourceMinLod = 0, .sparseBinding = 1, - .sparseResidencyBuffer = 1, - .sparseResidencyImage2D = 1, - .sparseResidencyImage3D = 1, - .sparseResidency2Samples = 1, - .sparseResidency4Samples = 1, - .sparseResidency8Samples = 1, - .sparseResidency16Samples = 1, - .sparseResidencyAliased = 1, + .sparseResidencyBuffer = 0, + .sparseResidencyImage2D = 0, + .sparseResidencyImage3D = 0, + .sparseResidency2Samples = 0, + .sparseResidency4Samples = 0, + .sparseResidency8Samples = 0, + .sparseResidency16Samples = 0, + .sparseResidencyAliased = 0, .variableMultisampleRate = 0, - .inheritedQueries = 1, + .inheritedQueries = 0, } }; VkDeviceCreateInfo dev_create_info = { From 4177261fc983d6de9f3e52d15695a84ace826962 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 3 Dec 2020 22:18:36 -0800 Subject: [PATCH 32/48] Increased the sizes of descriptor pools to actually fit the descriptor sets. --- src/refresh/vkpt/path_tracer.c | 2 +- src/refresh/vkpt/textures.c | 2 +- src/refresh/vkpt/vertex_buffer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index 8e110e6f3..e7bc82923 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -239,7 +239,7 @@ vkpt_pt_init() ATTACH_LABEL_VARIABLE(rt_pipeline_layout, PIPELINE_LAYOUT); VkDescriptorPoolSize pool_sizes[] = { - { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_FRAMES_IN_FLIGHT }, + { VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, MAX_FRAMES_IN_FLIGHT * LENGTH(bindings) }, { qvk.use_khr_ray_tracing ? VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR : VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_NV, MAX_FRAMES_IN_FLIGHT } }; diff --git a/src/refresh/vkpt/textures.c b/src/refresh/vkpt/textures.c index 0f4169871..f866029bd 100644 --- a/src/refresh/vkpt/textures.c +++ b/src/refresh/vkpt/textures.c @@ -987,7 +987,7 @@ vkpt_textures_initialize() ATTACH_LABEL_VARIABLE(qvk.desc_set_layout_textures, DESCRIPTOR_SET_LAYOUT); VkDescriptorPoolSize pool_size = { .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, - .descriptorCount = MAX_RIMAGES + 2 * NUM_VKPT_IMAGES + 128, + .descriptorCount = 2 * (MAX_RIMAGES + 2 * NUM_VKPT_IMAGES) + 128, }; VkDescriptorPoolCreateInfo pool_info = { diff --git a/src/refresh/vkpt/vertex_buffer.c b/src/refresh/vkpt/vertex_buffer.c index e377c025f..fe66bc83d 100644 --- a/src/refresh/vkpt/vertex_buffer.c +++ b/src/refresh/vkpt/vertex_buffer.c @@ -707,7 +707,7 @@ vkpt_vertex_buffer_create() VkDescriptorPoolSize pool_size = { .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = LENGTH(vbo_layout_bindings) + MAX_MODELS, + .descriptorCount = LENGTH(vbo_layout_bindings) + MAX_MODELS + 128, }; VkDescriptorPoolCreateInfo pool_info = { From 6e198fb3e6c38e64d710a793ff744cd19ab1139b Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 4 Dec 2020 10:10:10 -0800 Subject: [PATCH 33/48] Replaced the global RT pipeline with multiple pipelines, with a single RGEN in each one. --- src/refresh/vkpt/path_tracer.c | 831 ++++++++++++---------------- src/refresh/vkpt/shader/constants.h | 25 +- 2 files changed, 352 insertions(+), 504 deletions(-) diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index e7bc82923..8bddc16ee 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -55,6 +55,18 @@ typedef struct { qboolean present; } blas_t; +typedef enum { + PIPELINE_PRIMARY_RAYS, + PIPELINE_REFLECT_REFRACT_1, + PIPELINE_REFLECT_REFRACT_2, + PIPELINE_DIRECT_LIGHTING, + PIPELINE_DIRECT_LIGHTING_CAUSTICS, + PIPELINE_INDIRECT_LIGHTING_FIRST, + PIPELINE_INDIRECT_LIGHTING_SECOND, + + PIPELINE_COUNT +} pipeline_index_t; + static BufferResource_t buf_accel_scratch; static size_t scratch_buf_ptr = 0; static BufferResource_t buf_instances[MAX_FRAMES_IN_FLIGHT]; @@ -89,7 +101,7 @@ static VkDescriptorPool rt_descriptor_pool; static VkDescriptorSet rt_descriptor_set[MAX_FRAMES_IN_FLIGHT]; static VkDescriptorSetLayout rt_descriptor_set_layout; static VkPipelineLayout rt_pipeline_layout; -static VkPipeline rt_pipeline; +static VkPipeline rt_pipelines[PIPELINE_COUNT]; cvar_t* cvar_pt_enable_particles = NULL; cvar_t* cvar_pt_enable_beams = NULL; @@ -1014,9 +1026,9 @@ vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world ); \ } while(0) -static void setup_rt_pipeline(VkCommandBuffer cmd_buf) +static void setup_rt_pipeline(VkCommandBuffer cmd_buf, pipeline_index_t index) { - vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline); + vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipelines[index]); vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rt_pipeline_layout, 0, 1, rt_descriptor_set + qvk.current_frame_index, 0, 0); @@ -1032,6 +1044,55 @@ static void setup_rt_pipeline(VkCommandBuffer cmd_buf) rt_pipeline_layout, 3, 1, &qvk.desc_set_vertex_buffer, 0, 0); } +static void +dispatch_rays(VkCommandBuffer cmd_buf, pipeline_index_t pipeline_index, pt_push_constants_t push, uint32_t width, uint32_t height, uint32_t depth) +{ + setup_rt_pipeline(cmd_buf, pipeline_index); + + vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); + + uint32_t sbt_offset = SBT_ENTRIES_PER_PIPELINE * pipeline_index * shaderGroupBaseAlignment; + + if (qvk.use_khr_ray_tracing) + { + assert(buf_shader_binding_table.address); + + VkStridedDeviceAddressRegionKHR raygen = { + .deviceAddress = buf_shader_binding_table.address + sbt_offset, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR miss_and_hit = { + .deviceAddress = buf_shader_binding_table.address + sbt_offset, + .stride = shaderGroupBaseAlignment, + .size = shaderGroupBaseAlignment + }; + + VkStridedDeviceAddressRegionKHR callable = { + .deviceAddress = VK_NULL_HANDLE, + .stride = 0, + .size = 0 + }; + + qvkCmdTraceRaysKHR(cmd_buf, + &raygen, + &miss_and_hit, + &miss_and_hit, + &callable, + width, height, depth); + } + else // (!qvk.use_khr_ray_tracing) + { + qvkCmdTraceRaysNV(cmd_buf, + buf_shader_binding_table.buffer, sbt_offset, + buf_shader_binding_table.buffer, sbt_offset, shaderGroupBaseAlignment, + buf_shader_binding_table.buffer, sbt_offset, shaderGroupBaseAlignment, + VK_NULL_HANDLE, 0, 0, + width, height, depth); + } +} + VkResult vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) { @@ -1045,8 +1106,6 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) .size = VK_WHOLE_SIZE, ); - setup_rt_pipeline(cmd_buf); - BEGIN_PERF_MARKER(cmd_buf, PROFILER_PRIMARY_RAYS); for(int i = 0; i < qvk.device_count; i++) @@ -1056,46 +1115,8 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) pt_push_constants_t push; push.gpu_index = qvk.device_count == 1 ? -1 : i; push.bounce = 0; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); - - if (qvk.use_khr_ray_tracing) - { - assert(buf_shader_binding_table.address); - - VkStridedDeviceAddressRegionKHR raygen = { - .deviceAddress = buf_shader_binding_table.address + SBT_RGEN_PRIMARY_RAYS * shaderGroupBaseAlignment, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - VkStridedDeviceAddressRegionKHR miss_and_hit = { - .deviceAddress = buf_shader_binding_table.address, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR callable = { - .deviceAddress = VK_NULL_HANDLE, - .stride = 0, - .size = 0 - }; - - qvkCmdTraceRaysKHR(cmd_buf, - &raygen, - &miss_and_hit, - &miss_and_hit, - &callable, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); - } - else // (!qvk.use_khr_ray_tracing) - { - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, SBT_RGEN_PRIMARY_RAYS * shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); - } + dispatch_rays(cmd_buf, PIPELINE_PRIMARY_RAYS, push, qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); } set_current_gpu(cmd_buf, ALL_GPUS); @@ -1125,57 +1146,17 @@ vkpt_pt_trace_reflections(VkCommandBuffer cmd_buf, int bounce) { int frame_idx = qvk.frame_counter & 1; - setup_rt_pipeline(cmd_buf); - for (int i = 0; i < qvk.device_count; i++) - { - set_current_gpu(cmd_buf, i); - - pt_push_constants_t push; - push.gpu_index = qvk.device_count == 1 ? -1 : i; - push.bounce = bounce; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); + { + set_current_gpu(cmd_buf, i); - int shader = (bounce == 0) ? SBT_RGEN_REFLECT_REFRACT1 : SBT_RGEN_REFLECT_REFRACT2; + pipeline_index_t pipeline = (bounce == 0) ? PIPELINE_REFLECT_REFRACT_1 : PIPELINE_REFLECT_REFRACT_2; - if (qvk.use_khr_ray_tracing) - { - assert(buf_shader_binding_table.address); - - VkStridedDeviceAddressRegionKHR raygen = { - .deviceAddress = buf_shader_binding_table.address + shader * shaderGroupBaseAlignment, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR miss_and_hit = { - .deviceAddress = buf_shader_binding_table.address, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR callable = { - .deviceAddress = VK_NULL_HANDLE, - .stride = 0, - .size = 0 - }; + pt_push_constants_t push; + push.gpu_index = qvk.device_count == 1 ? -1 : i; + push.bounce = bounce; - qvkCmdTraceRaysKHR(cmd_buf, - &raygen, - &miss_and_hit, - &miss_and_hit, - &callable, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); - } - else // (!qvk.use_khr_ray_tracing) - { - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, shader * shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); - } + dispatch_rays(cmd_buf, pipeline, push, qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); } set_current_gpu(cmd_buf, ALL_GPUS); @@ -1200,61 +1181,19 @@ vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays) { int frame_idx = qvk.frame_counter & 1; - setup_rt_pipeline(cmd_buf); - BEGIN_PERF_MARKER(cmd_buf, PROFILER_DIRECT_LIGHTING); for (int i = 0; i < qvk.device_count; i++) { set_current_gpu(cmd_buf, i); + pipeline_index_t pipeline = (cvar_pt_caustics->value != 0) ? PIPELINE_DIRECT_LIGHTING_CAUSTICS : PIPELINE_DIRECT_LIGHTING; + pt_push_constants_t push; push.gpu_index = qvk.device_count == 1 ? -1 : i; push.bounce = 0; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); - int rgen_index = SBT_RGEN_DIRECT_LIGHTING; - if (cvar_pt_caustics->value != 0) - rgen_index = SBT_RGEN_DIRECT_LIGHTING_CAUSTICS; - - if (qvk.use_khr_ray_tracing) - { - assert(buf_shader_binding_table.address); - - VkStridedDeviceAddressRegionKHR raygen = { - .deviceAddress = buf_shader_binding_table.address + rgen_index * shaderGroupBaseAlignment, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR miss_and_hit = { - .deviceAddress = buf_shader_binding_table.address, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR callable = { - .deviceAddress = VK_NULL_HANDLE, - .stride = 0, - .size = 0 - }; - - qvkCmdTraceRaysKHR(cmd_buf, - &raygen, - &miss_and_hit, - &miss_and_hit, - &callable, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); - } - else // (!qvk.use_khr_ray_tracing) - { - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, rgen_index * shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); - } + dispatch_rays(cmd_buf, pipeline, push, qvk.extent_render.width / 2, qvk.extent_render.height, qvk.device_count == 1 ? 2 : 1); } set_current_gpu(cmd_buf, ALL_GPUS); @@ -1282,61 +1221,21 @@ vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays) { set_current_gpu(cmd_buf, i); - pt_push_constants_t push; - push.gpu_index = qvk.device_count == 1 ? -1 : i; - push.bounce = 0; - vkCmdPushConstants(cmd_buf, rt_pipeline_layout, VK_SHADER_STAGE_RAYGEN_BIT_KHR, 0, sizeof(push), &push); - + int height; + if (num_bounce_rays == 0.5f) + height = qvk.extent_render.height / 2; + else + height = qvk.extent_render.height; + for (int bounce_ray = 0; bounce_ray < (int)ceilf(num_bounce_rays); bounce_ray++) - { - int height; - if (num_bounce_rays == 0.5f) - height = qvk.extent_render.height / 2; - else - height = qvk.extent_render.height; - - int rgen_index = (bounce_ray == 0) - ? SBT_RGEN_INDIRECT_LIGHTING_FIRST - : SBT_RGEN_INDIRECT_LIGHTING_SECOND; - - if (qvk.use_khr_ray_tracing) - { - assert(buf_shader_binding_table.address); - - VkStridedDeviceAddressRegionKHR raygen = { - .deviceAddress = buf_shader_binding_table.address + rgen_index * shaderGroupBaseAlignment, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR miss_and_hit = { - .deviceAddress = buf_shader_binding_table.address, - .stride = shaderGroupBaseAlignment, - .size = shaderGroupBaseAlignment - }; - - VkStridedDeviceAddressRegionKHR callable = { - .deviceAddress = VK_NULL_HANDLE, - .stride = 0, - .size = 0 - }; - - qvkCmdTraceRaysKHR(cmd_buf, - &raygen, - &miss_and_hit, - &miss_and_hit, - &callable, - qvk.extent_render.width / 2, height, qvk.device_count == 1 ? 2 : 1); - } - else // (!qvk.use_khr_ray_tracing) - { - qvkCmdTraceRaysNV(cmd_buf, - buf_shader_binding_table.buffer, rgen_index * shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - buf_shader_binding_table.buffer, 0, shaderGroupBaseAlignment, - VK_NULL_HANDLE, 0, 0, - qvk.extent_render.width / 2, height, qvk.device_count == 1 ? 2 : 1); - } + { + pipeline_index_t pipeline = (bounce_ray == 0) ? PIPELINE_INDIRECT_LIGHTING_FIRST : PIPELINE_INDIRECT_LIGHTING_SECOND; + + pt_push_constants_t push; + push.gpu_index = qvk.device_count == 1 ? -1 : i; + push.bounce = 0; + + dispatch_rays(cmd_buf, pipeline, push, qvk.extent_render.width / 2, height, qvk.device_count == 1 ? 2 : 1); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_COLOR_LF_SH]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_COLOR_LF_COCG]); @@ -1399,17 +1298,15 @@ vkpt_pt_create_pipelines() } }; - uint32_t num_shader_groups = 0; - char* shader_handles = NULL; + uint32_t num_shader_groups = SBT_ENTRIES_PER_PIPELINE * PIPELINE_COUNT; + char* shader_handles = alloca(num_shader_groups * shaderGroupHandleSize); VkPipelineShaderStageCreateInfo shader_stages[] = { - SHADER_STAGE(QVK_MOD_PRIMARY_RAYS_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR), - SHADER_STAGE_SPEC(QVK_MOD_REFLECT_REFRACT_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[0]), - SHADER_STAGE_SPEC(QVK_MOD_REFLECT_REFRACT_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[1]), - SHADER_STAGE_SPEC(QVK_MOD_DIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[0]), - SHADER_STAGE_SPEC(QVK_MOD_DIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[1]), - SHADER_STAGE_SPEC(QVK_MOD_INDIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[0]), - SHADER_STAGE_SPEC(QVK_MOD_INDIRECT_LIGHTING_RGEN, VK_SHADER_STAGE_RAYGEN_BIT_KHR, &specInfo[1]), + { + .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, + .stage = VK_SHADER_STAGE_RAYGEN_BIT_KHR, + .pName = "main" + }, SHADER_STAGE(QVK_MOD_PATH_TRACER_RMISS, VK_SHADER_STAGE_MISS_BIT_KHR), SHADER_STAGE(QVK_MOD_PATH_TRACER_SHADOW_RMISS, VK_SHADER_STAGE_MISS_BIT_KHR), SHADER_STAGE(QVK_MOD_PATH_TRACER_RCHIT, VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR), @@ -1419,309 +1316,260 @@ vkpt_pt_create_pipelines() SHADER_STAGE(QVK_MOD_PATH_TRACER_SPRITE_RAHIT, VK_SHADER_STAGE_ANY_HIT_BIT_KHR), }; - if (qvk.use_khr_ray_tracing) + for (pipeline_index_t index = 0; index < PIPELINE_COUNT; index++) { - VkRayTracingShaderGroupCreateInfoKHR rt_shader_group_info[] = { - [SBT_RGEN_PRIMARY_RAYS] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 0, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RGEN_REFLECT_REFRACT1] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 1, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RGEN_REFLECT_REFRACT2] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 2, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RGEN_DIRECT_LIGHTING] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 3, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RGEN_DIRECT_LIGHTING_CAUSTICS] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 4, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RGEN_INDIRECT_LIGHTING_FIRST] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 5, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RGEN_INDIRECT_LIGHTING_SECOND] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 6, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RMISS_PATH_TRACER] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 7, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RMISS_SHADOW] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, - .generalShader = 8, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RCHIT_OPAQUE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, - .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = 9, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RAHIT_PARTICLE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, - .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 10, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RAHIT_BEAM] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, - .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 11, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RAHIT_EXPLOSION] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, - .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 12, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RAHIT_SPRITE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, - .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = 13, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - [SBT_RCHIT_EMPTY] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, - .generalShader = VK_SHADER_UNUSED_KHR, - .closestHitShader = VK_SHADER_UNUSED_KHR, - .anyHitShader = VK_SHADER_UNUSED_KHR, - .intersectionShader = VK_SHADER_UNUSED_KHR - }, - }; + switch (index) + { + case PIPELINE_PRIMARY_RAYS: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_PRIMARY_RAYS_RGEN]; + shader_stages[0].pSpecializationInfo = NULL; + break; + case PIPELINE_REFLECT_REFRACT_1: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_REFLECT_REFRACT_RGEN]; + shader_stages[0].pSpecializationInfo = &specInfo[0]; + break; + case PIPELINE_REFLECT_REFRACT_2: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_REFLECT_REFRACT_RGEN]; + shader_stages[0].pSpecializationInfo = &specInfo[1]; + break; + case PIPELINE_DIRECT_LIGHTING: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_DIRECT_LIGHTING_RGEN]; + shader_stages[0].pSpecializationInfo = &specInfo[0]; + break; + case PIPELINE_DIRECT_LIGHTING_CAUSTICS: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_DIRECT_LIGHTING_RGEN]; + shader_stages[0].pSpecializationInfo = &specInfo[1]; + break; + case PIPELINE_INDIRECT_LIGHTING_FIRST: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_INDIRECT_LIGHTING_RGEN]; + shader_stages[0].pSpecializationInfo = &specInfo[0]; + break; + case PIPELINE_INDIRECT_LIGHTING_SECOND: + shader_stages[0].module = qvk.shader_modules[QVK_MOD_INDIRECT_LIGHTING_RGEN]; + shader_stages[0].pSpecializationInfo = &specInfo[1]; + break; + default: + assert(!"invalid pipeline index"); + break; + } - VkPipelineLibraryCreateInfoKHR library_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR }; - VkRayTracingPipelineCreateInfoKHR rt_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, - .pNext = NULL, - .flags = 0, - .stageCount = LENGTH(shader_stages), - .pStages = shader_stages, - .groupCount = LENGTH(rt_shader_group_info), - .pGroups = rt_shader_group_info, - .maxPipelineRayRecursionDepth = 1, - .pLibraryInfo = &library_info, - .pLibraryInterface = NULL, - .pDynamicState = NULL, - .layout = rt_pipeline_layout, - .basePipelineHandle = rt_pipeline, - .basePipelineIndex = 0 - }; + if (qvk.use_khr_ray_tracing) + { + VkRayTracingShaderGroupCreateInfoKHR rt_shader_group_info[] = { + [SBT_RGEN] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 0, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RMISS_PATH_TRACER] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 1, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RMISS_SHADOW] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR, + .generalShader = 2, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RCHIT_OPAQUE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = 3, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_PARTICLE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 4, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_BEAM] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 5, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_EXPLOSION] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 6, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RAHIT_SPRITE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = 7, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + [SBT_RCHIT_EMPTY] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR, + .generalShader = VK_SHADER_UNUSED_KHR, + .closestHitShader = VK_SHADER_UNUSED_KHR, + .anyHitShader = VK_SHADER_UNUSED_KHR, + .intersectionShader = VK_SHADER_UNUSED_KHR + }, + }; - _VK(qvkCreateRayTracingPipelinesKHR(qvk.device, VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &rt_pipeline_info, NULL, &rt_pipeline)); + VkPipelineLibraryCreateInfoKHR library_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_LIBRARY_CREATE_INFO_KHR }; + VkRayTracingPipelineCreateInfoKHR rt_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR, + .pNext = NULL, + .flags = 0, + .stageCount = LENGTH(shader_stages), + .pStages = shader_stages, + .groupCount = LENGTH(rt_shader_group_info), + .pGroups = rt_shader_group_info, + .maxPipelineRayRecursionDepth = 1, + .pLibraryInfo = &library_info, + .pLibraryInterface = NULL, + .pDynamicState = NULL, + .layout = rt_pipeline_layout, + .basePipelineHandle = VK_NULL_HANDLE, + .basePipelineIndex = 0 + }; - num_shader_groups = LENGTH(rt_shader_group_info); + assert(LENGTH(rt_shader_group_info) == SBT_ENTRIES_PER_PIPELINE); - // get the shader handles in a dense array from VK - uint32_t shader_handle_array_size = num_shader_groups * shaderGroupHandleSize; - shader_handles = alloca(shader_handle_array_size); - _VK(qvkGetRayTracingShaderGroupHandlesKHR(qvk.device, rt_pipeline, 0, num_shader_groups, - shader_handle_array_size, shader_handles)); + VkResult res = qvkCreateRayTracingPipelinesKHR(qvk.device, VK_NULL_HANDLE, VK_NULL_HANDLE, 1, &rt_pipeline_info, NULL, &rt_pipelines[index]); + + if (res != VK_SUCCESS) + { + Com_EPrintf("Failed to create ray tracing pipeline #%d, vkCreateRayTracingPipelinesKHR error code is %s\n", index, qvk_result_to_string(res)); + return res; + } - } - else // (!qvk.use_khr_ray_tracing) - { - VkRayTracingShaderGroupCreateInfoNV rt_shader_group_info[] = + _VK(qvkGetRayTracingShaderGroupHandlesKHR( + qvk.device, rt_pipelines[index], 0, SBT_ENTRIES_PER_PIPELINE, + /* dataSize = */ SBT_ENTRIES_PER_PIPELINE * shaderGroupHandleSize, + /* pData = */ shader_handles + SBT_ENTRIES_PER_PIPELINE * shaderGroupHandleSize * index)); + } + else // (!qvk.use_khr_ray_tracing) { - [SBT_RGEN_PRIMARY_RAYS] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 0, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_REFLECT_REFRACT1] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 1, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_REFLECT_REFRACT2] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 2, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_DIRECT_LIGHTING] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 3, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_DIRECT_LIGHTING_CAUSTICS] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 4, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_INDIRECT_LIGHTING_FIRST] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 5, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RGEN_INDIRECT_LIGHTING_SECOND] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 6, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RMISS_PATH_TRACER] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 7, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RMISS_SHADOW] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, - .generalShader = 8, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RCHIT_OPAQUE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = 9, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_PARTICLE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 10, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_BEAM] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 11, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_EXPLOSION] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 12, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RAHIT_SPRITE] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = 13, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - [SBT_RCHIT_EMPTY] = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, - .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, - .generalShader = VK_SHADER_UNUSED_NV, - .closestHitShader = VK_SHADER_UNUSED_NV, - .anyHitShader = VK_SHADER_UNUSED_NV, - .intersectionShader = VK_SHADER_UNUSED_NV - }, - }; + VkRayTracingShaderGroupCreateInfoNV rt_shader_group_info[] = + { + [SBT_RGEN] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 0, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RMISS_PATH_TRACER] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 1, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RMISS_SHADOW] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_NV, + .generalShader = 2, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RCHIT_OPAQUE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = 3, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_PARTICLE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 4, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_BEAM] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 5, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_EXPLOSION] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 6, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RAHIT_SPRITE] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = 7, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + [SBT_RCHIT_EMPTY] = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_NV, + .type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_NV, + .generalShader = VK_SHADER_UNUSED_NV, + .closestHitShader = VK_SHADER_UNUSED_NV, + .anyHitShader = VK_SHADER_UNUSED_NV, + .intersectionShader = VK_SHADER_UNUSED_NV + }, + }; - VkRayTracingPipelineCreateInfoNV rt_pipeline_info = { - .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV, - .stageCount = LENGTH(shader_stages), - .pStages = shader_stages, - .groupCount = LENGTH(rt_shader_group_info), - .pGroups = rt_shader_group_info, - .layout = rt_pipeline_layout, - .maxRecursionDepth = 1, - }; + VkRayTracingPipelineCreateInfoNV rt_pipeline_info = { + .sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_NV, + .stageCount = LENGTH(shader_stages), + .pStages = shader_stages, + .groupCount = LENGTH(rt_shader_group_info), + .pGroups = rt_shader_group_info, + .layout = rt_pipeline_layout, + .maxRecursionDepth = 1, + }; - _VK(qvkCreateRayTracingPipelinesNV(qvk.device, NULL, 1, &rt_pipeline_info, NULL, &rt_pipeline)); + assert(LENGTH(rt_shader_group_info) == SBT_ENTRIES_PER_PIPELINE); - num_shader_groups = LENGTH(rt_shader_group_info); + VkResult res = qvkCreateRayTracingPipelinesNV(qvk.device, NULL, 1, &rt_pipeline_info, NULL, &rt_pipelines[index]); - // get the shader handles in a dense array from VK - uint32_t shader_handle_array_size = num_shader_groups * shaderGroupHandleSize; - shader_handles = alloca(shader_handle_array_size); - _VK(qvkGetRayTracingShaderGroupHandlesNV(qvk.device, rt_pipeline, 0, num_shader_groups, - shader_handle_array_size, shader_handles)); + if (res != VK_SUCCESS) + { + Com_EPrintf("Failed to create ray tracing pipeline #%d, vkCreateRayTracingPipelinesNV error code is %s\n", index, qvk_result_to_string(res)); + return res; + } + + _VK(qvkGetRayTracingShaderGroupHandlesNV( + qvk.device, rt_pipelines[index], 0, SBT_ENTRIES_PER_PIPELINE, + /* dataSize = */ SBT_ENTRIES_PER_PIPELINE* shaderGroupHandleSize, + /* pData = */ shader_handles + SBT_ENTRIES_PER_PIPELINE * shaderGroupHandleSize * index)); + } } // create the SBT buffer - uint32_t shader_binding_table_size = shaderGroupBaseAlignment * num_shader_groups; + uint32_t shader_binding_table_size = num_shader_groups * shaderGroupBaseAlignment; _VK(buffer_create(&buf_shader_binding_table, shader_binding_table_size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_SHADER_BINDING_TABLE_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)); @@ -1746,7 +1594,12 @@ VkResult vkpt_pt_destroy_pipelines() { buffer_destroy(&buf_shader_binding_table); - vkDestroyPipeline(qvk.device, rt_pipeline, NULL); + + for (pipeline_index_t index = 0; index < PIPELINE_COUNT; index++) + { + vkDestroyPipeline(qvk.device, rt_pipelines[index], NULL); + rt_pipelines[index] = VK_NULL_HANDLE; + } return VK_SUCCESS; } diff --git a/src/refresh/vkpt/shader/constants.h b/src/refresh/vkpt/shader/constants.h index ea1026d13..546961446 100644 --- a/src/refresh/vkpt/shader/constants.h +++ b/src/refresh/vkpt/shader/constants.h @@ -119,20 +119,15 @@ with this program; if not, write to the Free Software Foundation, Inc., #define AS_INSTANCE_FLAG_SKY (1 << 22) #define AS_INSTANCE_MASK_OFFSET (AS_INSTANCE_FLAG_SKY - 1) -#define SBT_RGEN_PRIMARY_RAYS 0 -#define SBT_RGEN_REFLECT_REFRACT1 1 -#define SBT_RGEN_REFLECT_REFRACT2 2 -#define SBT_RGEN_DIRECT_LIGHTING 3 -#define SBT_RGEN_DIRECT_LIGHTING_CAUSTICS 4 -#define SBT_RGEN_INDIRECT_LIGHTING_FIRST 5 -#define SBT_RGEN_INDIRECT_LIGHTING_SECOND 6 -#define SBT_RMISS_PATH_TRACER 7 -#define SBT_RMISS_SHADOW 8 -#define SBT_RCHIT_OPAQUE 9 -#define SBT_RAHIT_PARTICLE 10 -#define SBT_RAHIT_BEAM 11 -#define SBT_RAHIT_EXPLOSION 12 -#define SBT_RAHIT_SPRITE 13 -#define SBT_RCHIT_EMPTY 14 +#define SBT_RGEN 0 +#define SBT_RMISS_PATH_TRACER 1 +#define SBT_RMISS_SHADOW 2 +#define SBT_RCHIT_OPAQUE 3 +#define SBT_RAHIT_PARTICLE 4 +#define SBT_RAHIT_BEAM 5 +#define SBT_RAHIT_EXPLOSION 6 +#define SBT_RAHIT_SPRITE 7 +#define SBT_RCHIT_EMPTY 8 +#define SBT_ENTRIES_PER_PIPELINE 9 #endif /*_CONSTANTS_H_*/ From 6ca0b2c148f6395bcbe30ac59d784fe7d4b277b7 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 4 Dec 2020 11:16:42 -0800 Subject: [PATCH 34/48] Added a warning about the validation layer being enabled. --- src/refresh/vkpt/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 29e65f72a..9f93bd56d 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -832,6 +832,10 @@ init_vulkan() result = vkCreateInstance(&inst_create_info, NULL, &qvk.instance); qvk.enable_validation = qfalse; } + else if (cvar_vk_validation->integer) + { + Com_WPrintf("Vulkan validation layer is enabled, expect degraded game performance.\n"); + } if (result != VK_SUCCESS) { From 9b131060c7a2ab949398fb5c8dbba917b2951132 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 4 Dec 2020 12:09:44 -0800 Subject: [PATCH 35/48] Fixed the tone mapping curve shader - it was missing some barriers between shared memory reads and writes. Also, kept the subgroup use switch for potential later experiments. --- .../vkpt/shader/tone_mapping_curve.comp | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/refresh/vkpt/shader/tone_mapping_curve.comp b/src/refresh/vkpt/shader/tone_mapping_curve.comp index 2e348c97a..63ecdc1ec 100644 --- a/src/refresh/vkpt/shader/tone_mapping_curve.comp +++ b/src/refresh/vkpt/shader/tone_mapping_curve.comp @@ -37,8 +37,20 @@ with this program; if not, write to the Free Software Foundation, Inc., #version 460 #extension GL_GOOGLE_include_directive : enable #extension GL_EXT_nonuniform_qualifier : enable + +#define USE_SUBGROUPS 1 +#if USE_SUBGROUPS + +#define SUBGROUP_SIZE gl_SubgroupSize #extension GL_KHR_shader_subgroup_arithmetic : require +#else + +#define SUBGROUP_SIZE 1 + +#endif + + #define GLOBAL_UBO_DESC_SET_IDX 0 #include "global_ubo.h" @@ -78,10 +90,12 @@ shared float s_Shared[HISTOGRAM_BINS]; // Computes the sum of val across the entire workgroup. float computeSharedSum(float val, const uint linear_idx) { +#if USE_SUBGROUPS val = subgroupAdd(val); +#endif s_Shared[linear_idx] = val; barrier(); - for(uint k = 64; k >= gl_SubgroupSize; k /= 2) + for(uint k = 64; k >= SUBGROUP_SIZE; k /= 2) { if(linear_idx < k) { @@ -89,16 +103,20 @@ float computeSharedSum(float val, const uint linear_idx) } barrier(); } - return s_Shared[0]; + val = s_Shared[0]; + barrier(); + return val; } // Computes the max of val across the entire workgroup. float computeSharedMax(float val, const uint linear_idx) { +#if USE_SUBGROUPS val = subgroupMax(val); +#endif s_Shared[linear_idx] = val; barrier(); - for(uint k = 64; k >= gl_SubgroupSize; k /= 2) + for(uint k = 64; k >= SUBGROUP_SIZE; k /= 2) { if(linear_idx < k) { @@ -106,16 +124,20 @@ float computeSharedMax(float val, const uint linear_idx) } barrier(); } - return s_Shared[0]; + val = s_Shared[0]; + barrier(); + return val; } // Computes the inclusive prefix sum of val across the entire workgroup. float computePrefixSum(float val, const uint linear_idx) { +#if USE_SUBGROUPS val = subgroupInclusiveAdd(val); +#endif s_Shared[linear_idx] = val; barrier(); - for(uint k = gl_SubgroupSize; k < 128; k *= 2) + for(uint k = SUBGROUP_SIZE; k < 128; k *= 2) { uint block_idx = linear_idx/k; if((block_idx%2) == 1) @@ -124,7 +146,9 @@ float computePrefixSum(float val, const uint linear_idx) } barrier(); } - return s_Shared[linear_idx]; + val = s_Shared[linear_idx]; + barrier(); + return val; } void main() From 35ca52882ee098d77ddcef5e15116d47047fe881 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Fri, 4 Dec 2020 14:20:42 -0800 Subject: [PATCH 36/48] Fixed a few profiler issues. --- src/refresh/vkpt/main.c | 3 +++ src/refresh/vkpt/profiler.c | 38 +++++++++++++++++++++++++++++-------- src/refresh/vkpt/vkpt.h | 3 ++- 3 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 9f93bd56d..0eb1a56bf 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -980,6 +980,9 @@ init_vulkan() VkPhysicalDeviceProperties dev_properties; vkGetPhysicalDeviceProperties(devices[picked_device], &dev_properties); + // Store the timestamp period to get correct profiler results + qvk.timestampPeriod = dev_properties.limits.timestampPeriod; + Com_Printf("Picked physical device %d: %s\n", picked_device, dev_properties.deviceName); Com_Printf("Using %s\n", qvk.use_khr_ray_tracing ? VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME : VK_NV_RAY_TRACING_EXTENSION_NAME); diff --git a/src/refresh/vkpt/profiler.c b/src/refresh/vkpt/profiler.c index 850ac2d5d..78b4367b1 100644 --- a/src/refresh/vkpt/profiler.c +++ b/src/refresh/vkpt/profiler.c @@ -20,11 +20,16 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "vkpt.h" static VkQueryPool query_pool; -static uint64_t query_pool_results[NUM_PROFILER_QUERIES_PER_FRAME + 1]; + +static uint64_t query_pool_results[NUM_PROFILER_QUERIES_PER_FRAME * 2]; +// ^^^ +// not sure why (* 2) is necessary, looks like there is a bug in AMD drivers +// causing vkGetQueryPoolResults to stop writing the results halfway through +// the buffer if it's properly sized. extern cvar_t *cvar_pt_reflect_refract; -static qboolean profiler_queries_used[NUM_PROFILER_QUERIES_PER_FRAME * 2] = { 0 }; +static qboolean profiler_queries_used[NUM_PROFILER_QUERIES_PER_FRAME * MAX_FRAMES_IN_FLIGHT] = { 0 }; VkResult vkpt_profiler_initialize() @@ -32,7 +37,7 @@ vkpt_profiler_initialize() VkQueryPoolCreateInfo query_pool_info = { .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO, .queryType = VK_QUERY_TYPE_TIMESTAMP, - .queryCount = MAX_FRAMES_IN_FLIGHT * NUM_PROFILER_ENTRIES * 2, + .queryCount = MAX_FRAMES_IN_FLIGHT * NUM_PROFILER_QUERIES_PER_FRAME, }; vkCreateQueryPool(qvk.device, &query_pool_info, NULL, &query_pool); return VK_SUCCESS; @@ -52,8 +57,11 @@ vkpt_profiler_query(VkCommandBuffer cmd_buf, int idx, VKPTProfilerAction action) set_current_gpu(cmd_buf, 0); - vkCmdWriteTimestamp(cmd_buf, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, - query_pool, idx); + VkPipelineStageFlagBits stage = (action == PROFILER_START) + ? VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT + : VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + + vkCmdWriteTimestamp(cmd_buf, stage, query_pool, idx); set_current_gpu(cmd_buf, ALL_GPUS); @@ -126,7 +134,12 @@ draw_query(int x, int y, qhandle_t font, const char *enum_name, int idx) R_DrawString(x, y, 0, 128, buf, font); double ms = vkpt_get_profiler_result(idx); - snprintf(buf, sizeof buf, "%8.2f ms", ms); + + if(ms > 0.0) + snprintf(buf, sizeof buf, "%8.2f ms", ms); + else + snprintf(buf, sizeof buf, " N/A"); + R_DrawString(x + 256, y, 0, 128, buf, font); } @@ -152,7 +165,10 @@ draw_profiler(int enable_asvgf) PROFILER_DO(PROFILER_PRIMARY_RAYS, 1); if (cvar_pt_reflect_refract->integer > 0) { PROFILER_DO(PROFILER_REFLECT_REFRACT_1, 1); } if (cvar_pt_reflect_refract->integer > 1) { PROFILER_DO(PROFILER_REFLECT_REFRACT_2, 1); } - PROFILER_DO(PROFILER_ASVGF_GRADIENT_REPROJECT, 1); + if (enable_asvgf) + { + PROFILER_DO(PROFILER_ASVGF_GRADIENT_REPROJECT, 1); + } PROFILER_DO(PROFILER_DIRECT_LIGHTING, 1); PROFILER_DO(PROFILER_INDIRECT_LIGHTING, 1); PROFILER_DO(PROFILER_GOD_RAYS, 1); @@ -181,6 +197,12 @@ draw_profiler(int enable_asvgf) double vkpt_get_profiler_result(int idx) { - double ms = (double)(query_pool_results[idx * 2 + 1] - query_pool_results[idx * 2 + 0]) * 1e-6; + uint64_t begin = query_pool_results[idx * 2 + 0]; + uint64_t end = query_pool_results[idx * 2 + 1]; + + if (begin == 0 || end == 0) + return 0.0; // one of these queries was unavailable at the time vkGetQueryPoolResults was called + + double ms = (double)(end - begin) * 1e-6 * qvk.timestampPeriod; return ms; } \ No newline at end of file diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 22f654974..7176e33d7 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -221,7 +221,8 @@ typedef struct QVK_s { uint32_t current_swap_chain_image_index; uint32_t current_frame_index; // when set, we'll do a WFI before acquire for this many frames - uint32_t wait_for_idle_frames; + uint32_t wait_for_idle_frames; + float timestampPeriod; VkShaderModule shader_modules[NUM_QVK_SHADER_MODULES]; From 4eb197a6d219d0f47d83a817251193b5db51cc35 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Mon, 7 Dec 2020 10:41:21 -0800 Subject: [PATCH 37/48] Moved the engine version print to an earlier point. --- src/common/common.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common/common.c b/src/common/common.c index 9d7df5dd0..6cdc0d372 100644 --- a/src/common/common.c +++ b/src/common/common.c @@ -1019,6 +1019,10 @@ void Qcommon_Init(int argc, char **argv) Cmd_AddCommand("recycle", Com_Recycle_f); #endif + // Print the engine version early so that it's definitely included in the console log. + // The log file is opened during the execution of one of the config files above. + Com_LPrintf(PRINT_NOTICE, "\nEngine version: " APPLICATION " " VERSION_STRING ", built on " __DATE__ "\n\n"); + Netchan_Init(); NET_Init(); BSP_Init(); @@ -1054,7 +1058,6 @@ void Qcommon_Init(int argc, char **argv) Com_AddConfigFile(COM_POSTINIT_CFG, FS_TYPE_REAL); Com_Printf("====== " PRODUCT " initialized ======\n\n"); - Com_LPrintf(PRINT_NOTICE, APPLICATION " " VERSION_STRING ", " __DATE__ "\n"); if (fs_shareware->integer) { From fce4fad74e8454e22a6c871362290caeaeedb5bb Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Mon, 7 Dec 2020 11:21:22 -0800 Subject: [PATCH 38/48] Attempt to close the console log when an unhandled exception happens. --- src/windows/debug.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/windows/debug.c b/src/windows/debug.c index 075906ef3..9fa81e3c0 100644 --- a/src/windows/debug.c +++ b/src/windows/debug.c @@ -230,14 +230,14 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) #endif ); if (ret == IDNO) { - return EXCEPTION_EXECUTE_HANDLER; + goto finalize; } #define LL(x) \ do { \ moduleHandle = LoadLibrary(x); \ if (!moduleHandle) { \ - return EXCEPTION_CONTINUE_SEARCH; \ + goto finalize; \ } \ } while(0) @@ -245,7 +245,7 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) do { \ p##y = (x)GetProcAddress(moduleHandle, #y); \ if (!p##y) { \ - return EXCEPTION_CONTINUE_SEARCH; \ + goto finalize; \ } \ } while(0) @@ -270,7 +270,7 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) // get base directory to save crash dump to len = GetModuleFileName(NULL, execdir, sizeof(execdir)); if (!len || len >= sizeof(execdir)) { - return EXCEPTION_CONTINUE_SEARCH; + goto finalize; } while (--len) { @@ -280,7 +280,7 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) } if (!len || len + 24 >= MAX_PATH) { - return EXCEPTION_CONTINUE_SEARCH; + goto finalize; } execdir[len] = 0; @@ -309,7 +309,8 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) "Base directory is not writable.", CRASH_TITLE, MB_ICONERROR); - return EXCEPTION_EXECUTE_HANDLER; + + goto finalize; } } @@ -320,7 +321,8 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) "Please remove existing reports from base directory.", CRASH_TITLE, MB_ICONERROR); - return EXCEPTION_EXECUTE_HANDLER; + + goto finalize; } pSymSetOptions( @@ -514,6 +516,10 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) pShellExecuteA(NULL, "open", path, NULL, execdir, SW_SHOW); +finalize: + // Try to quit nicely, most importantly, try to flush and close the console log. + Com_Quit(NULL, ERR_FATAL); + return EXCEPTION_EXECUTE_HANDLER; } From d337bff74157fe84e7150ba7b899f39833453e21 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Mon, 7 Dec 2020 16:37:09 -0800 Subject: [PATCH 39/48] Improved motion vectors for multiple refraction, in particular when thick glass is enabled. --- src/refresh/vkpt/shader/reflect_refract.rgen | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/refresh/vkpt/shader/reflect_refract.rgen b/src/refresh/vkpt/shader/reflect_refract.rgen index 138e3f7b5..809df445f 100644 --- a/src/refresh/vkpt/shader/reflect_refract.rgen +++ b/src/refresh/vkpt/shader/reflect_refract.rgen @@ -381,10 +381,12 @@ main() { primary_medium = MEDIUM_NONE; } + correct_motion_vector = 2; } else { direction = reflected_direction; + correct_motion_vector = 1; } } @@ -547,7 +549,7 @@ main() material_id = triangle.material_id; cluster_idx = triangle.cluster; - if(correct_motion_vector > 0 && spec_bounce_index == 0) + if(correct_motion_vector == 1 && spec_bounce_index == 0 || correct_motion_vector == 2) { vec3 ref_pos_curr, ref_pos_prev; vec3 ref_geo_normal; From a4d5017e8fd2a14d76fabfff77a7257a35f59d53 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 8 Dec 2020 10:12:56 -0800 Subject: [PATCH 40/48] Added a second driver version check for KHR_ray_tracing_pipeline. --- src/refresh/vkpt/main.c | 39 +++++++++++++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 0eb1a56bf..7189f6309 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -69,7 +69,8 @@ extern cvar_t* cvar_flt_taa; static int drs_current_scale = 0; static int drs_effective_scale = 0; -cvar_t *cvar_min_driver_version = NULL; +cvar_t* cvar_min_driver_version = NULL; +cvar_t* cvar_min_driver_version_khr = NULL; cvar_t *cvar_nv_ray_tracing = NULL; cvar_t *cvar_vk_validation = NULL; @@ -994,15 +995,34 @@ init_vulkan() Com_Printf("NVIDIA GPU detected. Driver version: %u.%02u\n", driver_major, driver_minor); - uint32_t required_major = 0; - uint32_t required_minor = 0; - int nfields = sscanf(cvar_min_driver_version->string, "%u.%u", &required_major, &required_minor); - if (nfields == 2) + if (qvk.use_khr_ray_tracing) { - if (driver_major < required_major || driver_major == required_major && driver_minor < required_minor) + uint32_t required_major = 0; + uint32_t required_minor = 0; + int nfields = sscanf(cvar_min_driver_version_khr->string, "%u.%u", &required_major, &required_minor); + if (nfields == 2) { - Com_Error(ERR_FATAL, "This game requires NVIDIA Graphics Driver version to be at least %u.%02u, while the installed version is %u.%02u.\nPlease update the NVIDIA Graphics Driver.", - required_major, required_minor, driver_major, driver_minor); + if (driver_major < required_major || driver_major == required_major && driver_minor < required_minor) + { + Com_Error(ERR_FATAL, "Running Quake II RTX with KHR ray tracing extensions requires NVIDIA Graphics Driver version " + "to be at least %u.%02u, while the installed version is %u.%02u. Please update the NVIDIA Graphics Driver, or " + "switch to the legacy mode by adding \"+set nv_ray_tracing 1\" to the command line.", + required_major, required_minor, driver_major, driver_minor); + } + } + } + else + { + uint32_t required_major = 0; + uint32_t required_minor = 0; + int nfields = sscanf(cvar_min_driver_version->string, "%u.%u", &required_major, &required_minor); + if (nfields == 2) + { + if (driver_major < required_major || driver_major == required_major && driver_minor < required_minor) + { + Com_Error(ERR_FATAL, "This game requires NVIDIA Graphics Driver version to be at least %u.%02u, while the installed version is %u.%02u.\nPlease update the NVIDIA Graphics Driver.", + required_major, required_minor, driver_major, driver_minor); + } } } } @@ -3163,6 +3183,9 @@ R_Init_RTX(qboolean total) // and the current test no longer works. cvar_min_driver_version = Cvar_Get("min_driver_version", "430.86", 0); + // Separate min driver version for the KHR ray tracing mode + cvar_min_driver_version_khr = Cvar_Get("min_driver_version_khr", "460.82", 0); + // When nonzero, the game will pick NV_ray_tracing if both NV and KHR extensions are available cvar_nv_ray_tracing = Cvar_Get("nv_ray_tracing", "0", CVAR_REFRESH | CVAR_ARCHIVE); From 8d4aa9825de73f0af5e57700cdfc676f373d5e07 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 8 Dec 2020 10:18:01 -0800 Subject: [PATCH 41/48] Added opaque flags to the geometry that should be considered opaque for the purposes of ray tracing. Improves performance a little. --- src/refresh/vkpt/path_tracer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index 8bddc16ee..a475e084c 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -817,20 +817,20 @@ vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, qboolean include_world if (include_world) { - append_blas(instances, &num_instances, &blas_static, 0, AS_FLAG_OPAQUE, 0, 0); - append_blas(instances, &num_instances, &blas_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, 0, 0); - append_blas(instances, &num_instances, &blas_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, 0, 0); - append_blas(instances, &num_instances, &blas_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, 0, 0); + append_blas(instances, &num_instances, &blas_static, 0, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); } - append_blas(instances, &num_instances, &blas_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, 0, 0); - append_blas(instances, &num_instances, &blas_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, 0, 0); + append_blas(instances, &num_instances, &blas_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, 0); append_blas(instances, &num_instances, &blas_explosions[idx], AS_INSTANCE_FLAG_DYNAMIC | explosions_primitive_offset, AS_FLAG_EXPLOSIONS, 0, 3); if (cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON) { - append_blas(instances, &num_instances, &blas_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 0); - append_blas(instances, &num_instances, &blas_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0, 0); + append_blas(instances, &num_instances, &blas_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, 0); + append_blas(instances, &num_instances, &blas_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | (weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0), 0); } if (cvar_pt_enable_particles->integer != 0) From 06b6f5276fb47d8baaf9739de09f28428e40886c Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 8 Dec 2020 11:18:14 -0800 Subject: [PATCH 42/48] Added branch name to the version info and included full version name into the resources. --- CMakeLists.txt | 9 +++++++++ inc/shared/config.h | 3 ++- src/CMakeLists.txt | 1 + src/client/console.c | 2 +- src/common/common.c | 4 ++-- src/windows/debug.c | 2 +- src/windows/res/baseq2.rc | 9 ++------- src/windows/res/q2rtx.rc | 9 ++------- src/windows/res/q2rtxded.rc | 11 +++-------- 9 files changed, 23 insertions(+), 27 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7745e8ea2..c04e5c404 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,15 @@ execute_process( OUTPUT_STRIP_TRAILING_WHITESPACE ) +# get branch name +execute_process( + COMMAND git rev-parse --abbrev-ref HEAD + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} + OUTPUT_VARIABLE Q2RTX_VERSION_BRANCH + OUTPUT_STRIP_TRAILING_WHITESPACE +) + + OPTION(CONFIG_GL_RENDERER "Enable GL renderer" ON) OPTION(CONFIG_VKPT_RENDERER "Enable VKPT renderer" ON) OPTION(CONFIG_VKPT_ENABLE_DEVICE_GROUPS "Enable device groups (multi-gpu) support" ON) diff --git a/inc/shared/config.h b/inc/shared/config.h index 13cf21eb1..241cf0d03 100644 --- a/inc/shared/config.h +++ b/inc/shared/config.h @@ -5,7 +5,8 @@ // expand to generate version string #define STRING(x) #x #define _STR(x) STRING(x) -#define VERSION_STRING "" _STR(VERSION_MAJOR) "." _STR(VERSION_MINOR) "." _STR(VERSION_POINT) "-" _STR(VERSION_SHA) +#define VERSION_STRING "" _STR(VERSION_MAJOR) "." _STR(VERSION_MINOR) "." _STR(VERSION_POINT) +#define LONG_VERSION_STRING "" _STR(VERSION_MAJOR) "." _STR(VERSION_MINOR) "." _STR(VERSION_POINT) "-" _STR(VERSION_BRANCH) "-" _STR(VERSION_SHA) #ifdef _WIN64 #define CPUSTRING "x86_64" diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 1415f428e..480f4653e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -356,6 +356,7 @@ ADD_DEFINITIONS(-DVERSION_MAJOR=${Q2RTX_VERSION_MAJOR}) ADD_DEFINITIONS(-DVERSION_MINOR=${Q2RTX_VERSION_MINOR}) ADD_DEFINITIONS(-DVERSION_POINT=${Q2RTX_VERSION_POINT}) ADD_DEFINITIONS(-DVERSION_SHA=${Q2RTX_VERSION_SHA}) +ADD_DEFINITIONS(-DVERSION_BRANCH=${Q2RTX_VERSION_BRANCH}) ADD_DEFINITIONS(-DHAVE_CONFIG_H=1 -DCURL_STATICLIB) diff --git a/src/client/console.c b/src/client/console.c index 9303342c6..5ac001893 100644 --- a/src/client/console.c +++ b/src/client/console.c @@ -938,7 +938,7 @@ static void Con_DrawSolidConsole(void) UI_DRAWCURSOR, con.charsetImage); } -#define APP_VERSION APPLICATION " " VERSION_STRING +#define APP_VERSION APPLICATION " " LONG_VERSION_STRING #define VER_WIDTH ((int)(sizeof(APP_VERSION) + 1) * CHAR_WIDTH) y = vislines - CON_PRESTEP + CHAR_HEIGHT; diff --git a/src/common/common.c b/src/common/common.c index 6cdc0d372..742d56fd7 100644 --- a/src/common/common.c +++ b/src/common/common.c @@ -116,7 +116,7 @@ cvar_t *rcon_password; extern cvar_t *fs_shareware; const char com_version_string[] = - APPLICATION " " VERSION_STRING " " __DATE__ " " BUILDSTRING " " CPUSTRING; + APPLICATION " " LONG_VERSION_STRING " " __DATE__ " " BUILDSTRING " " CPUSTRING; unsigned com_framenum; unsigned com_eventTime; @@ -1021,7 +1021,7 @@ void Qcommon_Init(int argc, char **argv) // Print the engine version early so that it's definitely included in the console log. // The log file is opened during the execution of one of the config files above. - Com_LPrintf(PRINT_NOTICE, "\nEngine version: " APPLICATION " " VERSION_STRING ", built on " __DATE__ "\n\n"); + Com_LPrintf(PRINT_NOTICE, "\nEngine version: " APPLICATION " " LONG_VERSION_STRING ", built on " __DATE__ "\n\n"); Netchan_Init(); NET_Init(); diff --git a/src/windows/debug.c b/src/windows/debug.c index 9fa81e3c0..6a04d9608 100644 --- a/src/windows/debug.c +++ b/src/windows/debug.c @@ -345,7 +345,7 @@ LONG WINAPI Sys_ExceptionFilter(LPEXCEPTION_POINTERS exceptionInfo) systemTime.wMinute, systemTime.wSecond); write_report( - "by " APPLICATION " " VERSION_STRING + "by " APPLICATION " " LONG_VERSION_STRING ", built " __DATE__", " __TIME__ "\r\n"); #pragma warning(push) diff --git a/src/windows/res/baseq2.rc b/src/windows/res/baseq2.rc index 712459b31..a930a8026 100644 --- a/src/windows/res/baseq2.rc +++ b/src/windows/res/baseq2.rc @@ -12,13 +12,8 @@ #define VER_FILEFLAGS 0x0L #endif -#ifdef _WIN64 -#define VER_FILEDESCRIPTION_STR "Q2RTX baseq2 game module (64-bit)" +#define VER_FILEDESCRIPTION_STR "Q2RTX baseq2 Game Module " LONG_VERSION_STRING #define VER_ORIGINALFILENAME_STR "gamex86_64.dll" -#else -#define VER_FILEDESCRIPTION_STR "Q2RTX baseq2 game module" -#define VER_ORIGINALFILENAME_STR "gamex86.dll" -#endif LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US @@ -40,7 +35,7 @@ BEGIN VALUE "OriginalFilename", VER_ORIGINALFILENAME_STR VALUE "FileVersion", VERSION_STRING VALUE "InternalName", "baseq2" - VALUE "LegalCopyright", "Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved." + VALUE "LegalCopyright", "Copyright (C) 2019-2020, NVIDIA CORPORATION. All rights reserved." VALUE "ProductName", "Quake II RTX" VALUE "ProductVersion", VERSION_STRING END diff --git a/src/windows/res/q2rtx.rc b/src/windows/res/q2rtx.rc index c9b4532e7..d6e66d902 100644 --- a/src/windows/res/q2rtx.rc +++ b/src/windows/res/q2rtx.rc @@ -12,13 +12,8 @@ #define VER_FILEFLAGS 0x0L #endif -#ifdef _WIN64 -#define VER_FILEDESCRIPTION_STR "Q2RTX client (64-bit)" +#define VER_FILEDESCRIPTION_STR "Q2RTX Client " LONG_VERSION_STRING #define VER_ORIGINALFILENAME_STR "q2rtx.exe" -#else -#define VER_FILEDESCRIPTION_STR "Q2RTX client" -#define VER_ORIGINALFILENAME_STR "q2rtx.exe" -#endif LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US @@ -40,7 +35,7 @@ BEGIN VALUE "OriginalFilename", VER_ORIGINALFILENAME_STR VALUE "FileVersion", VERSION_STRING VALUE "InternalName", "q2rtx" - VALUE "LegalCopyright", "Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved." + VALUE "LegalCopyright", "Copyright (C) 2019-2020, NVIDIA CORPORATION. All rights reserved." VALUE "ProductName", "Quake II RTX" VALUE "ProductVersion", VERSION_STRING END diff --git a/src/windows/res/q2rtxded.rc b/src/windows/res/q2rtxded.rc index d7d1db5f7..caba24c1f 100644 --- a/src/windows/res/q2rtxded.rc +++ b/src/windows/res/q2rtxded.rc @@ -12,13 +12,8 @@ #define VER_FILEFLAGS 0x0L #endif -#ifdef _WIN64 -#define VER_FILEDESCRIPTION_STR "Q2RTX dedicated server (64-bit)" -#define VER_ORIGINALFILENAME_STR "q2proded64.exe" -#else -#define VER_FILEDESCRIPTION_STR "Q2RTX dedicated server" -#define VER_ORIGINALFILENAME_STR "q2proded.exe" -#endif +#define VER_FILEDESCRIPTION_STR "Q2RTX Dedicated Server " LONG_VERSION_STRING +#define VER_ORIGINALFILENAME_STR "q2rtxded.exe" LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US @@ -40,7 +35,7 @@ BEGIN VALUE "OriginalFilename", VER_ORIGINALFILENAME_STR VALUE "FileVersion", VERSION_STRING VALUE "InternalName", "q2rtxded" - VALUE "LegalCopyright", "Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved." + VALUE "LegalCopyright", "Copyright (C) 2019-2020, NVIDIA CORPORATION. All rights reserved." VALUE "ProductName", "Quake II RTX" VALUE "ProductVersion", VERSION_STRING END From 98677b66f109f8be81a4538af2c8cfee62f3288d Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Tue, 8 Dec 2020 12:51:26 -0800 Subject: [PATCH 43/48] Set version to 1.4.0. --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c04e5c404..151d29501 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,8 +4,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") project(quake2-RTX) set(Q2RTX_VERSION_MAJOR 1) -set(Q2RTX_VERSION_MINOR 3) -set(Q2RTX_VERSION_POINT 99) +set(Q2RTX_VERSION_MINOR 4) +set(Q2RTX_VERSION_POINT 0) # get short-hash execute_process( From bd91eafb73453d04d6997a4aca8f926cdf20f06a Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 10 Dec 2020 08:44:47 -0800 Subject: [PATCH 44/48] Use cached memory for image readbacks - this makes taking screenshots much faster. --- src/refresh/vkpt/textures.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/refresh/vkpt/textures.c b/src/refresh/vkpt/textures.c index f866029bd..b6433efa9 100644 --- a/src/refresh/vkpt/textures.c +++ b/src/refresh/vkpt/textures.c @@ -1416,7 +1416,7 @@ create_readback_image(VkImage *image, VkDeviceMemory *memory, VkDeviceSize *memo .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, .allocationSize = mem_req.size, .memoryTypeIndex = get_memory_type(mem_req.memoryTypeBits, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT), + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT), }; _VK(vkAllocateMemory(qvk.device, &mem_alloc_info, NULL, memory)); From a72cc07b1e70d937e1a1250f909d03a571b29fb0 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 10 Dec 2020 08:48:19 -0800 Subject: [PATCH 45/48] Added cvar `gl_screenshot_message` to control whether a message is printed when you take a screenshot, and made all screenshot cvars archived. --- src/refresh/images.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/refresh/images.c b/src/refresh/images.c index 4ccac7dfb..1e66a9fa4 100644 --- a/src/refresh/images.c +++ b/src/refresh/images.c @@ -410,7 +410,8 @@ SCREEN SHOTS static cvar_t *r_screenshot_format; static cvar_t *r_screenshot_quality; -static cvar_t *r_screenshot_compression; +static cvar_t* r_screenshot_compression; +static cvar_t* r_screenshot_message; static qhandle_t create_screenshot(char *buffer, size_t size, const char *name, const char *ext) @@ -466,7 +467,7 @@ static void make_screenshot(const char *name, const char *ext, if (ret < 0) { Com_EPrintf("Couldn't write %s: %s\n", buffer, Q_ErrorString(ret)); - } else { + } else if(r_screenshot_message->integer) { Com_Printf("Wrote %s\n", buffer); } } @@ -1430,10 +1431,11 @@ void IMG_Init(void) r_texture_formats->changed = r_texture_formats_changed; r_texture_formats_changed(r_texture_formats); - r_screenshot_format = Cvar_Get("gl_screenshot_format", "jpg", 0); - r_screenshot_format = Cvar_Get("gl_screenshot_format", "png", 0); - r_screenshot_quality = Cvar_Get("gl_screenshot_quality", "100", 0); - r_screenshot_compression = Cvar_Get("gl_screenshot_compression", "6", 0); + r_screenshot_format = Cvar_Get("gl_screenshot_format", "jpg", CVAR_ARCHIVE); + r_screenshot_format = Cvar_Get("gl_screenshot_format", "png", CVAR_ARCHIVE); + r_screenshot_quality = Cvar_Get("gl_screenshot_quality", "100", CVAR_ARCHIVE); + r_screenshot_compression = Cvar_Get("gl_screenshot_compression", "6", CVAR_ARCHIVE); + r_screenshot_message = Cvar_Get("gl_screenshot_message", "0", CVAR_ARCHIVE); Cmd_Register(img_cmd); From 243ccbb3bbf6a6e4a73b7ccf137e33aaf45eaae5 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 10 Dec 2020 13:46:30 -0800 Subject: [PATCH 46/48] Fixed the Lanczos final blit filter that was broken since commit 5406a1bf6c608154055a7a429aacacfe72eff3e1. --- src/refresh/vkpt/textures.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/refresh/vkpt/textures.c b/src/refresh/vkpt/textures.c index b6433efa9..9a5792478 100644 --- a/src/refresh/vkpt/textures.c +++ b/src/refresh/vkpt/textures.c @@ -1639,6 +1639,7 @@ LIST_IMAGES_A_B } img_info[VKPT_IMG_ASVGF_TAA_A].sampler = qvk.tex_sampler; img_info[VKPT_IMG_ASVGF_TAA_B].sampler = qvk.tex_sampler; + img_info[VKPT_IMG_TAA_OUTPUT].sampler = qvk.tex_sampler; VkWriteDescriptorSet output_img_write[NUM_IMAGES * 2]; From d65b787a872777b5ba8be860359eefb05d775a19 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 10 Dec 2020 14:05:55 -0800 Subject: [PATCH 47/48] Added change log for 1.4.0. --- changelog.md | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/changelog.md b/changelog.md index be1eb55be..86e89a214 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,49 @@ # Quake II RTX Change Log +## 1.4.0 + +**New Features:** + + * Added support for the final Vulkan Ray Tracing API. The game can now run on any GPU supporting the `VK_KHR_ray_tracing_pipeline` extension. + * Added temporal upscaling, or TAAU, for improved image quality at lower resolution scales. + +**Fixed Issues:** + + * Fixed a crash that happened when there are no available sound devices. + * Fixed a few issues with the tone mapper and the profiler for AMD GPU compatibility. + * Fixed a server crash: https://github.com/NVIDIA/Q2RTX/issues/86 + * Fixed black materials and some light leaks: https://github.com/NVIDIA/Q2RTX/issues/55 + * Fixed building the game with GCC10 on Linux: https://github.com/NVIDIA/Q2RTX/issues/80 + * Fixed missing railgun lights in photo mode: https://github.com/NVIDIA/Q2RTX/issues/75 + * Fixed missing sun light on geometry with invalid clusters. + * Fixed the CFLAGS for MinSizeRel and RelWithDebInfo builds to generate correct debug symbols. + * Fixed the game stuttering on Linux: https://github.com/NVIDIA/Q2RTX/issues/62 + * Fixed the issue with all models being missing or corrupted on some maps during network play. + * Fixed the nearest filter when DRS was enabled and then disabled. + +**Denoiser Improvements:** + + * Implemented a new gradient estimation algorithm that makes the image more stable in reflections and refractions. + * Implemented sampling across checkerboard fields in the temporal filter to reduce blurring. + * Improved motion vectors for multiple refraction, in particular when thick glass is enabled. + * Improved the temporal filter to avoid smearing on surfaces that appear at small glancing angles, e.g. on the floor when going up the stairs. + * Improved the temporal filter to make lighting more stable on high-detail surfaces. + + +**Misc Improvements:** + + * Added git branch name to the game version info. + * Improved in-game screenshot feature performance. + * Improved the console log to get more information in case of game crashes. + * Increased precision of printed FPS when running timedemos. + * Made the `wrote ` message that was issued when taking screenshots optional, controlled by the `gl_screenshot_message` cvar. + * Reduced the amount of stutter that happened when new geometry is loaded, like on weapon pickup. + * Replaced the Vulkan headers stored in the repository with a submodule pointing to https://github.com/KhronosGroup/Vulkan-Headers + * Static resolution scale can now be set to as low as 25%. + * Updated SDL2 version to changeset 13784. + * Vulkan validation layer can now be enabled through the `vk_validation` cvar. + + ## 1.3.0 **New Features:** From ed88cc71fc5699914cca454dc82908ee8eb75359 Mon Sep 17 00:00:00 2001 From: Alexey Panteleev Date: Thu, 10 Dec 2020 14:15:10 -0800 Subject: [PATCH 48/48] Added the game library pdb into the package. --- setup/setup.nsi | 1 + 1 file changed, 1 insertion(+) diff --git a/setup/setup.nsi b/setup/setup.nsi index a5c301a4a..1a7600d22 100644 --- a/setup/setup.nsi +++ b/setup/setup.nsi @@ -69,6 +69,7 @@ Section "Engine Files (Required)" Section_Game SetOutPath "$INSTDIR\baseq2" File "${SOURCE_DIR}\baseq2\gamex86_64.dll" + File "${SOURCE_DIR}\baseq2\gamex86_64.pdb" SetCompress OFF File "${SOURCE_DIR}\baseq2\shaders.pkz"