diff --git a/CHANGELOG.md b/CHANGELOG.md index 97afe8d15b..9d88667d92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -92,6 +92,7 @@ By @wumpf in [#6069](https://github.com/gfx-rs/wgpu/pull/6069), [#6099](https:// #### DX12 - Replace `winapi` code to use the `windows` crate. By @MarijnS95 in [#5956](https://github.com/gfx-rs/wgpu/pull/5956) +- Get `num_workgroups` builtin working for indirect dispatches. By @teoxoy in [#5730](https://github.com/gfx-rs/wgpu/pull/5730) ## 22.0.0 (2024-07-17) diff --git a/tests/tests/dispatch_workgroups_indirect.rs b/tests/tests/dispatch_workgroups_indirect.rs index 1752c84de3..6810118aba 100644 --- a/tests/tests/dispatch_workgroups_indirect.rs +++ b/tests/tests/dispatch_workgroups_indirect.rs @@ -1,4 +1,4 @@ -use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext}; +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; /// Make sure that the num_workgroups builtin works properly (it requires a workaround on D3D12). #[gpu_test] @@ -12,8 +12,7 @@ static NUM_WORKGROUPS_BUILTIN: GpuTestConfiguration = GpuTestConfiguration::new( .limits(wgpu::Limits { max_push_constant_size: 4, ..wgpu::Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + }), ) .run_async(|ctx| async move { let num_workgroups = [1, 2, 3]; @@ -34,8 +33,7 @@ static DISCARD_DISPATCH: GpuTestConfiguration = GpuTestConfiguration::new() max_compute_workgroups_per_dimension: 10, max_push_constant_size: 4, ..wgpu::Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + }), ) .run_async(|ctx| async move { let max = ctx.device.limits().max_compute_workgroups_per_dimension; diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index f252b5c2ae..a98a81d076 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2637,7 +2637,8 @@ impl Device { let hal_desc = hal::PipelineLayoutDescriptor { label: desc.label.to_hal(self.instance_flags), - flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE, + flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE + | hal::PipelineLayoutFlags::NUM_WORK_GROUPS, bind_group_layouts: &raw_bind_group_layouts, push_constant_ranges: desc.push_constant_ranges.as_ref(), }; diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index 74bd20107e..51116b58c3 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -1,4 +1,4 @@ -use std::sync::atomic::AtomicBool; +use std::{num::NonZeroU64, sync::atomic::AtomicBool}; use thiserror::Error; @@ -61,7 +61,7 @@ impl IndirectValidation { let src = format!(" @group(0) @binding(0) - var dst: array; + var dst: array; @group(1) @binding(0) var src: array; struct OffsetPc {{ @@ -76,6 +76,9 @@ impl IndirectValidation { dst[0] = res.x; dst[1] = res.y; dst[2] = res.z; + dst[3] = res.x; + dst[4] = res.y; + dst[5] = res.z; }} "); @@ -121,6 +124,8 @@ impl IndirectValidation { } })?; + const DST_BUFFER_SIZE: NonZeroU64 = unsafe { NonZeroU64::new_unchecked(4 * 3 * 2) }; + let dst_bind_group_layout_desc = hal::BindGroupLayoutDescriptor { label: None, flags: hal::BindGroupLayoutFlags::empty(), @@ -130,7 +135,7 @@ impl IndirectValidation { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some(DST_BUFFER_SIZE), }, count: None, }], @@ -150,7 +155,7 @@ impl IndirectValidation { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: true, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some(NonZeroU64::new(4 * 3).unwrap()), }, count: None, }], @@ -208,7 +213,7 @@ impl IndirectValidation { let dst_buffer_desc = hal::BufferDescriptor { label: None, - size: 4 * 3, + size: DST_BUFFER_SIZE.get(), usage: hal::BufferUses::INDIRECT | hal::BufferUses::STORAGE_READ_WRITE, memory_flags: hal::MemoryFlags::empty(), }; @@ -228,7 +233,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer: dst_buffer_0.as_ref(), offset: 0, - size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + size: Some(DST_BUFFER_SIZE), }], samplers: &[], textures: &[], @@ -251,7 +256,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer: dst_buffer_1.as_ref(), offset: 0, - size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + size: Some(DST_BUFFER_SIZE), }], samplers: &[], textures: &[], @@ -296,7 +301,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer, offset: 0, - size: Some(std::num::NonZeroU64::new(binding_size).unwrap()), + size: Some(NonZeroU64::new(binding_size).unwrap()), }], samplers: &[], textures: &[], diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 5f32480fdb..9e0be30325 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1198,11 +1198,17 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { - self.prepare_dispatch([0; 3]); - //TODO: update special constants indirectly + self.update_root_elements(); + let cmd_signature = if let Some(cmd_signatures) = + self.pass.layout.special_constants_cmd_signatures.as_ref() + { + &cmd_signatures.dispatch + } else { + &self.shared.cmd_signatures.dispatch + }; unsafe { self.list.as_ref().unwrap().ExecuteIndirect( - &self.shared.cmd_signatures.dispatch, + cmd_signature, 1, &buffer.resource, offset, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index dd68160315..d78534be3f 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -93,34 +93,12 @@ impl super::Device { let capacity_views = limits.max_non_sampler_bindings as u64; let capacity_samplers = 2_048; - fn create_command_signature( - raw: &Direct3D12::ID3D12Device, - byte_stride: usize, - arguments: &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC], - node_mask: u32, - ) -> Result { - let mut signature = None; - unsafe { - raw.CreateCommandSignature( - &Direct3D12::D3D12_COMMAND_SIGNATURE_DESC { - ByteStride: byte_stride as u32, - NumArgumentDescs: arguments.len() as u32, - pArgumentDescs: arguments.as_ptr(), - NodeMask: node_mask, - }, - None, - &mut signature, - ) - } - .into_device_result("Command signature creation")?; - signature.ok_or(crate::DeviceError::ResourceCreationFailed) - } - let shared = super::DeviceShared { zero_buffer, cmd_signatures: super::CommandSignatures { - draw: create_command_signature( + draw: Self::create_command_signature( &raw, + None, mem::size_of::(), &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, @@ -128,8 +106,9 @@ impl super::Device { }], 0, )?, - draw_indexed: create_command_signature( + draw_indexed: Self::create_command_signature( &raw, + None, mem::size_of::(), &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, @@ -137,8 +116,9 @@ impl super::Device { }], 0, )?, - dispatch: create_command_signature( + dispatch: Self::create_command_signature( &raw, + None, mem::size_of::(), &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, @@ -213,6 +193,30 @@ impl super::Device { }) } + fn create_command_signature( + raw: &Direct3D12::ID3D12Device, + root_signature: Option<&ID3D12RootSignature>, + byte_stride: usize, + arguments: &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC], + node_mask: u32, + ) -> Result { + let mut signature = None; + unsafe { + raw.CreateCommandSignature( + &Direct3D12::D3D12_COMMAND_SIGNATURE_DESC { + ByteStride: byte_stride as u32, + NumArgumentDescs: arguments.len() as u32, + pArgumentDescs: arguments.as_ptr(), + NodeMask: node_mask, + }, + root_signature, + &mut signature, + ) + } + .into_device_result("Command signature creation")?; + signature.ok_or(crate::DeviceError::ResourceCreationFailed) + } + // Blocks until the dedicated present queue is finished with all of its work. // // Once this method completes, the surface is able to be resized or deleted. @@ -1112,6 +1116,63 @@ impl crate::Device for super::Device { } .into_device_result("Root signature creation")?; + let special_constants_cmd_signatures = + if let Some(root_index) = special_constants_root_index { + let constant_indirect_argument_desc = Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + Anonymous: Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC_0 { + Constant: Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC_0_1 { + RootParameterIndex: root_index, + DestOffsetIn32BitValues: 0, + Num32BitValuesToSet: 3, + }, + }, + }; + Some(super::CommandSignatures { + draw: Self::create_command_signature( + &self.raw, + Some(&raw), + 12 + mem::size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, + ..Default::default() + }, + ], + 0, + )?, + draw_indexed: Self::create_command_signature( + &self.raw, + Some(&raw), + 12 + mem::size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, + ..Default::default() + }, + ], + 0, + )?, + dispatch: Self::create_command_signature( + &self.raw, + Some(&raw), + 12 + mem::size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, + ..Default::default() + }, + ], + 0, + )?, + }) + } else { + None + }; + if let Some(label) = desc.label { unsafe { raw.SetName(&windows::core::HSTRING::from(label)) } .into_device_result("SetName")?; @@ -1124,6 +1185,7 @@ impl crate::Device for super::Device { signature: Some(raw), total_root_elements: parameters.len() as super::RootIndex, special_constants_root_index, + special_constants_cmd_signatures, root_constant_info, }, bind_group_infos, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index e4b9e74637..060d6392f2 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -516,6 +516,7 @@ struct Idler { event: Event, } +#[derive(Debug, Clone)] struct CommandSignatures { draw: Direct3D12::ID3D12CommandSignature, draw_indexed: Direct3D12::ID3D12CommandSignature, @@ -634,6 +635,7 @@ impl PassState { signature: None, total_root_elements: 0, special_constants_root_index: None, + special_constants_cmd_signatures: None, root_constant_info: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], @@ -871,6 +873,7 @@ struct PipelineLayoutShared { signature: Option, total_root_elements: RootIndex, special_constants_root_index: Option, + special_constants_cmd_signatures: Option, root_constant_info: Option, }