Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use shared memory (workgroup) to speed up compute shader? #14

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
730 changes: 424 additions & 306 deletions Cargo.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,21 @@ edition = "2021"

[dependencies]
anyhow = "1.0.4"
bevy = { version = "0.10.0", features = ["serialize"] }
bevy = { version = "0.10.1", features = ["serialize"] }
bevy_rapier3d = { version = "0.21.0", features = ["enhanced-determinism", "debug-render"] }
bytemuck = "1.7"
ron = "0.8.0"
toml = "0.5.9"
toml = "0.7.3"
flagset = "0.4.3"
serde = "1.0.149"
smartstring = { version = "1.0.1", features = ["serde"] }
wgpu = "0.15.0"

[profile.dev]
opt-level = 1
opt-level = 0

[profile.dev.package."*"]
opt-level = 3
opt-level = 0

[profile.release]
opt-level = 3
Expand Down
6 changes: 3 additions & 3 deletions assets/shaders/simplex.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ fn simplexNoise2(v: vec2<f32>) -> f32 {
return 130. * dot(m, g);
}

@compute @workgroup_size(32, 32, 1)
fn main(@builtin(global_invocation_id) invocation_id: vec3<u32>) {
let index = invocation_id.x + invocation_id.y * 32u;
@workgroup_size(32, 32, 1)
@compute fn main(@builtin(global_invocation_id) invocation_id: vec3<u32>) {
let index = invocation_id.x + invocation_id.y * 16u;
let in_point = in_points.data[index];
out_heights.data[index] = simplexNoise2(in_point);
}
381 changes: 176 additions & 205 deletions assets/shaders/voxels.wgsl

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ use bevy::{
mesh::{Indices, VertexAttributeValues},
render_resource::*,
},
render::view::NoFrustumCulling,
};
use bevy_rapier3d::prelude::*;

Expand Down Expand Up @@ -52,7 +53,7 @@ fn main() {
..default()
})
.add_plugin(RapierPhysicsPlugin::<NoUserData>::default())
.add_plugin(RapierDebugRenderPlugin::default())
// .add_plugin(RapierDebugRenderPlugin::default())
.add_plugin(VoxelsPlugin)
.add_plugin(FrameTimeDiagnosticsPlugin::default())
.add_plugin(InventoryPlugin)
Expand Down Expand Up @@ -213,6 +214,7 @@ fn spawn_voxel_sys(
commands.spawn(Map::default());
commands.spawn((
Chunk::new(IVec3::ZERO),
NoFrustumCulling,
PbrBundle {
mesh: mesh_handle.clone(),
material: ground_mat_handle.clone(),
Expand Down Expand Up @@ -254,8 +256,8 @@ fn spawn_player_sys(mut commands: Commands) {
pub struct Buffers {
// Place edge table and triangle table in uniform buffer
// They are too large to have inline in the shader
edge_table: Buffer,
tri_table: Buffer,
triangle_table: Buffer,
block_face_table: Buffer,
points: BufVec<Vec2>,
heights: BufVec<f32>,
voxels: Buffer,
Expand Down
4 changes: 2 additions & 2 deletions src/qgame/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use bevy::{
prelude::*,
reflect::TypeUuid,
utils::BoxedFuture,
window::CursorGrabMode,
};
use bevy::window::CursorGrabMode;
use flagset::{flags, FlagSet};
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -154,7 +154,7 @@ impl AssetLoader for ConfigAssetLoader {
load_context: &'a mut LoadContext,
) -> BoxedFuture<'a, Result<(), anyhow::Error>> {
Box::pin(async move {
let asset: Config = toml::from_slice(bytes)?;
let asset: Config = toml::from_str(std::str::from_utf8(bytes)?)?;
load_context.set_default_asset(LoadedAsset::new(asset));
Ok(())
})
Expand Down
2 changes: 1 addition & 1 deletion src/qgame/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ impl AssetLoader for ConfigAssetLoader {
load_context: &'a mut LoadContext,
) -> BoxedFuture<'a, Result<(), anyhow::Error>> {
Box::pin(async move {
let asset: GunProps = toml::from_slice(bytes)?;
let asset: GunProps = toml::from_str(std::str::from_utf8(bytes)?)?;
load_context.set_default_asset(LoadedAsset::new(asset));
Ok(())
})
Expand Down
46 changes: 10 additions & 36 deletions src/qgame/lookup.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,4 @@
pub(crate) const EDGE_TABLE: &'static [u32; 256] = &[
0x000, 0x109, 0x203, 0x30a, 0x406, 0x50f, 0x605, 0x70c,
0x80c, 0x905, 0xa0f, 0xb06, 0xc0a, 0xd03, 0xe09, 0xf00,
0x190, 0x099, 0x393, 0x29a, 0x596, 0x49f, 0x795, 0x69c,
0x99c, 0x895, 0xb9f, 0xa96, 0xd9a, 0xc93, 0xf99, 0xe90,
0x230, 0x339, 0x033, 0x13a, 0x636, 0x73f, 0x435, 0x53c,
0xa3c, 0xb35, 0x83f, 0x936, 0xe3a, 0xf33, 0xc39, 0xd30,
0x3a0, 0x2a9, 0x1a3, 0x0aa, 0x7a6, 0x6af, 0x5a5, 0x4ac,
0xbac, 0xaa5, 0x9af, 0x8a6, 0xfaa, 0xea3, 0xda9, 0xca0,
0x460, 0x569, 0x663, 0x76a, 0x066, 0x16f, 0x265, 0x36c,
0xc6c, 0xd65, 0xe6f, 0xf66, 0x86a, 0x963, 0xa69, 0xb60,
0x5f0, 0x4f9, 0x7f3, 0x6fa, 0x1f6, 0x0ff, 0x3f5, 0x2fc,
0xdfc, 0xcf5, 0xfff, 0xef6, 0x9fa, 0x8f3, 0xbf9, 0xaf0,
0x650, 0x759, 0x453, 0x55a, 0x256, 0x35f, 0x055, 0x15c,
0xe5c, 0xf55, 0xc5f, 0xd56, 0xa5a, 0xb53, 0x859, 0x950,
0x7c0, 0x6c9, 0x5c3, 0x4ca, 0x3c6, 0x2cf, 0x1c5, 0x0cc,
0xfcc, 0xec5, 0xdcf, 0xcc6, 0xbca, 0xac3, 0x9c9, 0x8c0,
0x8c0, 0x9c9, 0xac3, 0xbca, 0xcc6, 0xdcf, 0xec5, 0xfcc,
0x0cc, 0x1c5, 0x2cf, 0x3c6, 0x4ca, 0x5c3, 0x6c9, 0x7c0,
0x950, 0x859, 0xb53, 0xa5a, 0xd56, 0xc5f, 0xf55, 0xe5c,
0x15c, 0x055, 0x35f, 0x256, 0x55a, 0x453, 0x759, 0x650,
0xaf0, 0xbf9, 0x8f3, 0x9fa, 0xef6, 0xfff, 0xcf5, 0xdfc,
0x2fc, 0x3f5, 0x0ff, 0x1f6, 0x6fa, 0x7f3, 0x4f9, 0x5f0,
0xb60, 0xa69, 0x963, 0x86a, 0xf66, 0xe6f, 0xd65, 0xc6c,
0x36c, 0x265, 0x16f, 0x066, 0x76a, 0x663, 0x569, 0x460,
0xca0, 0xda9, 0xea3, 0xfaa, 0x8a6, 0x9af, 0xaa5, 0xbac,
0x4ac, 0x5a5, 0x6af, 0x7a6, 0x0aa, 0x1a3, 0x2a9, 0x3a0,
0xd30, 0xc39, 0xf33, 0xe3a, 0x936, 0x83f, 0xb35, 0xa3c,
0x53c, 0x435, 0x73f, 0x636, 0x13a, 0x033, 0x339, 0x230,
0xe90, 0xf99, 0xc93, 0xd9a, 0xa96, 0xb9f, 0x895, 0x99c,
0x69c, 0x795, 0x49f, 0x596, 0x29a, 0x393, 0x099, 0x190,
0xf00, 0xe09, 0xd03, 0xc0a, 0xb06, 0xa0f, 0x905, 0x80c,
0x70c, 0x605, 0x50f, 0x406, 0x30a, 0x203, 0x109, 0x000
];

pub(crate) const TRI_TABLE: &'static [[i32; 16]; 256] = &[
pub(crate) const TRIANGLE_TABLE: &'static [[i32; 16]; 256] = &[
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
[0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
[0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
Expand Down Expand Up @@ -291,3 +256,12 @@ pub(crate) const TRI_TABLE: &'static [[i32; 16]; 256] = &[
[0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1],
[-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1]
];

pub(crate) const BLOCK_FACE_TABLE: &'static [[[f32; 3]; 4]; 6] = &[
[[0.5, -0.5, -0.5], [0.5, 0.5, -0.5], [0.5, 0.5, 0.5], [0.5, -0.5, 0.5]],
[[-0.5, -0.5, 0.5], [-0.5, 0.5, 0.5], [-0.5, 0.5, -0.5], [-0.5, -0.5, -0.5]],
[[-0.5, 0.5, 0.5], [-0.5, 0.5, -0.5], [0.5, 0.5, -0.5], [0.5, 0.5, 0.5]],
[[-0.5, -0.5, 0.5], [0.5, -0.5, 0.5], [0.5, -0.5, -0.5], [-0.5, -0.5, -0.5]],
[[-0.5, -0.5, 0.5], [-0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, -0.5, 0.5]],
[[-0.5, 0.5, -0.5], [-0.5, -0.5, -0.5], [0.5, -0.5, -0.5], [0.5, 0.5, -0.5]],
];
36 changes: 19 additions & 17 deletions src/qgame/voxel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use crate::*;
const CHUNK_SZ: usize = 32;
const CHUNK_SZ_2: usize = CHUNK_SZ * CHUNK_SZ;
const CHUNK_SZ_3: usize = CHUNK_SZ * CHUNK_SZ * CHUNK_SZ;
const COMPUTE_TILE_SZ: usize = 4;

#[derive(Component)]
pub struct Chunk {
Expand Down Expand Up @@ -85,14 +86,14 @@ impl FromWorld for VoxelsPipeline {
let render_device = world.get_resource::<RenderDevice>().unwrap();
let _asset_server = world.get_resource::<AssetServer>().unwrap();

let edge_table = render_device.create_buffer_with_data(&BufferInitDescriptor {
label: Some("edge table buffer"),
contents: cast_slice(EDGE_TABLE),
let triangle_table = render_device.create_buffer_with_data(&BufferInitDescriptor {
label: Some("tri table buffer"),
contents: cast_slice(TRIANGLE_TABLE),
usage: BufferUsages::STORAGE,
});
let tri_table = render_device.create_buffer_with_data(&BufferInitDescriptor {
label: Some("tri table buffer"),
contents: cast_slice(TRI_TABLE),
let block_face_table = render_device.create_buffer_with_data(&BufferInitDescriptor {
label: Some("block face table buffer"),
contents: cast_slice(BLOCK_FACE_TABLE),
usage: BufferUsages::STORAGE,
});
let points: BufVec<Vec2> = BufVec::with_capacity(false, CHUNK_SZ_2, render_device);
Expand All @@ -109,10 +110,10 @@ impl FromWorld for VoxelsPipeline {
usage: BufferUsages::COPY_DST | BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
let vertices: BufVec<Vec4> = BufVec::with_capacity(true, CHUNK_SZ_3 * 4 * 6, render_device);
let uvs: BufVec<Vec2> = BufVec::with_capacity(true, CHUNK_SZ_3 * 4 * 6, render_device);
let normals: BufVec<Vec4> = BufVec::with_capacity(true, CHUNK_SZ_3 * 4 * 6, render_device);
let indices: BufVec<u32> = BufVec::with_capacity(true, CHUNK_SZ_3 * 6 * 6, render_device);
let vertices: BufVec<Vec4> = BufVec::with_capacity(true, CHUNK_SZ_3 * 12, render_device);
let uvs: BufVec<Vec2> = BufVec::with_capacity(true, CHUNK_SZ_3 * 12, render_device);
let normals: BufVec<Vec4> = BufVec::with_capacity(true, CHUNK_SZ_3 * 12, render_device);
let indices: BufVec<u32> = BufVec::with_capacity(true, CHUNK_SZ_3 * 12, render_device);
let atomics: BufVec<u32> = BufVec::with_capacity(true, 2, render_device);
let atomics_staging = render_device.create_buffer_with_data(&BufferInitDescriptor {
label: Some("atomics staging buffer"),
Expand Down Expand Up @@ -147,7 +148,7 @@ impl FromWorld for VoxelsPipeline {
entry_point: "main",
});

world.insert_resource(Buffers { edge_table, tri_table, points, heights, voxels, voxels_staging, vertices, normals, uvs, indices, atomics, atomics_staging });
world.insert_resource(Buffers { triangle_table, block_face_table, points, heights, voxels, voxels_staging, vertices, normals, uvs, indices, atomics, atomics_staging });

VoxelsPipeline {
simplex_pipeline,
Expand Down Expand Up @@ -205,8 +206,8 @@ pub fn voxel_polygonize_system(
label: Some("voxels binding"),
layout: &pipeline.voxels_pipeline.get_bind_group_layout(0),
entries: &[
BindGroupEntry { binding: 0, resource: buffers.edge_table.as_entire_binding() },
BindGroupEntry { binding: 1, resource: buffers.tri_table.as_entire_binding() },
BindGroupEntry { binding: 0, resource: buffers.triangle_table.as_entire_binding() },
BindGroupEntry { binding: 1, resource: buffers.block_face_table.as_entire_binding() },
BindGroupEntry { binding: 2, resource: buffers.voxels.as_entire_binding() },
BindGroupEntry { binding: 3, resource: buffers.atomics.buffer().as_entire_binding() },
BindGroupEntry { binding: 4, resource: buffers.vertices.buffer().as_entire_binding() },
Expand All @@ -224,7 +225,7 @@ pub fn voxel_polygonize_system(
let mut pass = command_encoder.begin_compute_pass(&ComputePassDescriptor::default());
pass.set_pipeline(&pipeline.simplex_pipeline);
pass.set_bind_group(0, &binding_groups.simplex, &[]);
pass.dispatch_workgroups((CHUNK_SZ / 32) as u32, (CHUNK_SZ / 32) as u32, 1);
pass.dispatch_workgroups(1, 1, 1);
}
buffers.heights.encode_read(CHUNK_SZ_2, &mut command_encoder);
render_queue.submit(once(command_encoder.finish()));
Expand All @@ -236,7 +237,8 @@ pub fn voxel_polygonize_system(
for y in 0..CHUNK_SZ {
for x in 0..CHUNK_SZ {
let noise01 = (buffers.heights.as_slice()[x + z * CHUNK_SZ] + 1.0) * 0.5;
let height = noise01 * 4.0 + 8.0 - (y as f32);
// let height = noise01 * 4.0 + 8.0 - (y as f32);
let height = noise01 * 1.0 + 2.0 - (y as f32);
let mut density = 0.0;

if height > 1.0 {
Expand Down Expand Up @@ -265,7 +267,7 @@ pub fn voxel_polygonize_system(
let mut pass = command_encoder.begin_compute_pass(&ComputePassDescriptor::default());
pass.set_pipeline(&pipeline.voxels_pipeline);
pass.set_bind_group(0, &binding_groups.voxels, &[]);
let dispatch_size = (CHUNK_SZ / 8) as u32;
let dispatch_size = (CHUNK_SZ / COMPUTE_TILE_SZ) as u32;
pass.dispatch_workgroups(dispatch_size, dispatch_size, dispatch_size);
}
buffers.atomics.encode_read(2, &mut command_encoder);
Expand Down Expand Up @@ -326,7 +328,7 @@ pub fn voxel_polygonize_system(
}

// TODO:perf inefficient
commands.entity(entity).insert(Collider::from_bevy_mesh(mesh, &ComputedColliderShape::TriMesh).unwrap());
// commands.entity(entity).insert(Collider::from_bevy_mesh(mesh, &ComputedColliderShape::TriMesh).unwrap());
}

// println!("Elapsed: {:.2?}", now.elapsed());
Expand Down