-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Property pool hlsl #94
Draft
deprilula28
wants to merge
14
commits into
master
Choose a base branch
from
property_pool_hlsl
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
dddf575
Work on property pools example
deprilula28 43d95c8
Add creation of property pool handler to example
deprilula28 66e93fb
Work on doing transferProperties on example
deprilula28 56f855d
Work on property pool example
deprilula28 ed594ab
Merge branch 'vulkan_1_3' into property_pool_hlsl
deprilula28 3adca44
Fix vulkan_1_3 incompatibilities
deprilula28 e8e512f
Update property pool example for vulkan_1_3
deprilula28 f834030
WIP testing
deprilula28 9682dee
WIP suballocated descriptor set
deprilula28 48be8e8
Testing sub allocator descriptor set allocations
deprilula28 7bc9f35
Work on property pool example fixes
deprilula28 102aa47
WIP example
deprilula28 ac17825
Remove unused things from example 05
deprilula28 e7b1f9b
Remove window app stuff
deprilula28 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
include(common RESULT_VARIABLE RES) | ||
if(NOT RES) | ||
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") | ||
endif() | ||
|
||
nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") | ||
|
||
if(NBL_EMBED_BUILTIN_RESOURCES) | ||
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) | ||
set(RESOURCE_DIR "app_resources") | ||
|
||
get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) | ||
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) | ||
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) | ||
|
||
file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") | ||
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) | ||
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") | ||
endforeach() | ||
|
||
ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") | ||
|
||
LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#include "nbl/builtin/hlsl/cpp_compat.hlsl" | ||
|
||
// Unfortunately not every piece of C++14 metaprogramming syntax is available in HLSL 202x | ||
// https://github.com/microsoft/DirectXShaderCompiler/issues/5751#issuecomment-1800847954 | ||
typedef nbl::hlsl::float32_t3 input_t; | ||
typedef nbl::hlsl::float32_t output_t; | ||
|
||
NBL_CONSTEXPR_STATIC_INLINE uint32_t MaxPossibleElementCount = 1 << 20; | ||
|
||
struct PushConstantData | ||
{ | ||
uint64_t inputAddress; | ||
uint64_t outputAddress; | ||
uint32_t dataElementCount; | ||
}; | ||
|
||
NBL_CONSTEXPR uint32_t WorkgroupSize = 256; | ||
|
||
// Yes we do have our own re-creation of C++'s STL in HLSL2021 ! | ||
#include "nbl/builtin/hlsl/limits.hlsl" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
#include "common.hlsl" | ||
|
||
// just a small test | ||
#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" | ||
|
||
[[vk::push_constant]] PushConstantData pushConstants; | ||
|
||
// does absolutely nothing, a later example will show how it gets used | ||
template<typename capability_traits=nbl::hlsl::jit::device_capabilities_traits> | ||
void dummyTraitTest() {} | ||
|
||
[numthreads(WorkgroupSize,1,1)] | ||
void main(uint32_t3 ID : SV_DispatchThreadID) | ||
{ | ||
dummyTraitTest(); | ||
if (ID.x>=pushConstants.dataElementCount) | ||
return; | ||
|
||
const input_t self = vk::RawBufferLoad<input_t>(pushConstants.inputAddress+sizeof(input_t)*ID.x); | ||
|
||
nbl::hlsl::Xoroshiro64StarStar rng = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(pushConstants.dataElementCount,ID.x)^0xdeadbeefu); | ||
|
||
float32_t acc = nbl::hlsl::numeric_limits<float32_t>::max; | ||
const static uint32_t OthersToTest = 15; | ||
[[unroll(OthersToTest)]] | ||
for (uint32_t i=0; i<OthersToTest; i++) | ||
{ | ||
const uint32_t offset = rng() % pushConstants.dataElementCount; | ||
const input_t other = vk::RawBufferLoad<input_t>(pushConstants.inputAddress+sizeof(input_t)*offset); | ||
acc = min(length(other-self),acc); | ||
} | ||
vk::RawBufferStore<float32_t>(pushConstants.outputAddress+sizeof(float32_t)*ID.x,acc); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"enableParallelBuild": true, | ||
"threadsPerBuildProcess" : 2, | ||
"isExecuted": false, | ||
"scriptPath": "", | ||
"cmake": { | ||
"configurations": [ "Release", "Debug", "RelWithDebInfo" ], | ||
"buildModes": [], | ||
"requiredOptions": [] | ||
}, | ||
"profiles": [ | ||
{ | ||
"backend": "vulkan", // should be none | ||
"platform": "windows", | ||
"buildModes": [], | ||
"runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example | ||
"gpuArchitectures": [] | ||
} | ||
], | ||
"dependencies": [], | ||
"data": [ | ||
{ | ||
"dependencies": [], | ||
"command": [""], | ||
"outputs": [] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
// Copyright (C) 2018-2023 - DevSH Graphics Programming Sp. z O.O. | ||
// This file is part of the "Nabla Engine". | ||
// For conditions of distribution and use, see copyright notice in nabla.h | ||
|
||
|
||
#include "nbl/video/surface/CSurfaceVulkan.h" | ||
#include "nbl/video/alloc/SubAllocatedDescriptorSet.h" | ||
|
||
#include "../common/BasicMultiQueueApplication.hpp" | ||
#include "../common/MonoAssetManagerAndBuiltinResourceApplication.hpp" | ||
|
||
using namespace nbl; | ||
using namespace core; | ||
using namespace system; | ||
using namespace ui; | ||
using namespace asset; | ||
using namespace video; | ||
|
||
#include "app_resources/common.hlsl" | ||
#include "nbl/builtin/hlsl/bit.hlsl" | ||
|
||
// In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants | ||
class PropertyPoolsApp final : public examples::MonoDeviceApplication, public examples::MonoAssetManagerAndBuiltinResourceApplication | ||
{ | ||
using device_base_t = examples::MonoDeviceApplication; | ||
using asset_base_t = examples::MonoAssetManagerAndBuiltinResourceApplication; | ||
|
||
smart_refctd_ptr<CPropertyPoolHandler> m_propertyPoolHandler; | ||
smart_refctd_ptr<IGPUBuffer> m_scratchBuffer; | ||
smart_refctd_ptr<IGPUBuffer> m_addressBuffer; | ||
smart_refctd_ptr<IGPUBuffer> m_transferSrcBuffer; | ||
smart_refctd_ptr<IGPUBuffer> m_transferDstBuffer; | ||
std::vector<uint16_t> m_data; | ||
|
||
// The pool cache is just a formalized way of round-robining command pools and resetting + reusing them after their most recent submit signals finished. | ||
// Its a little more ergonomic to use if you don't have a 1:1 mapping between frames and pools. | ||
smart_refctd_ptr<nbl::video::ICommandPoolCache> m_poolCache; | ||
|
||
// This example really lets the advantages of a timeline semaphore shine through! | ||
smart_refctd_ptr<ISemaphore> m_timeline; | ||
uint64_t m_iteration = 0; | ||
constexpr static inline uint64_t MaxIterations = 200; | ||
|
||
static constexpr uint64_t TransfersAmount = 1024; | ||
static constexpr uint64_t MaxValuesPerTransfer = 512; | ||
|
||
|
||
public: | ||
// Yay thanks to multiple inheritance we cannot forward ctors anymore | ||
PropertyPoolsApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : | ||
system::IApplicationFramework(_localInputCWD,_localOutputCWD,_sharedInputCWD,_sharedOutputCWD) {} | ||
|
||
// we stuff all our work here because its a "single shot" app | ||
bool onAppInitialized(smart_refctd_ptr<ISystem>&& system) override | ||
{ | ||
using nbl::video::IGPUDescriptorSetLayout; | ||
|
||
// Remember to call the base class initialization! | ||
if (!device_base_t::onAppInitialized(std::move(system))) | ||
return false; | ||
if (!asset_base_t::onAppInitialized(std::move(system))) | ||
return false; | ||
|
||
m_propertyPoolHandler = core::make_smart_refctd_ptr<CPropertyPoolHandler>(core::smart_refctd_ptr(m_device)); | ||
|
||
auto createBuffer = [&](uint64_t size, core::bitflag<asset::IBuffer::E_USAGE_FLAGS> flags, const char* name, bool hostVisible) | ||
{ | ||
video::IGPUBuffer::SCreationParams creationParams; | ||
creationParams.size = ((size + 3) / 4) * 4; // Align | ||
creationParams.usage = flags | ||
| asset::IBuffer::EUF_STORAGE_BUFFER_BIT | ||
| asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | ||
| asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; | ||
|
||
auto buffer = m_device->createBuffer(std::move(creationParams)); | ||
nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = buffer->getMemoryReqs(); | ||
if (hostVisible) | ||
reqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDownStreamingMemoryTypeBits(); | ||
m_device->allocate(reqs, buffer.get(), nbl::video::IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT); | ||
buffer->setObjectDebugName(name); | ||
|
||
return buffer; | ||
}; | ||
|
||
m_scratchBuffer = createBuffer(sizeof(nbl::hlsl::property_pools::TransferRequest) * TransfersAmount, core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT), "m_scratchBuffer", true); | ||
m_addressBuffer = createBuffer(sizeof(uint32_t) * TransfersAmount * MaxValuesPerTransfer, core::bitflag(asset::IBuffer::EUF_NONE), "m_addressBuffer", false); | ||
m_transferSrcBuffer = createBuffer(sizeof(uint16_t) * TransfersAmount * MaxValuesPerTransfer, core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT), "m_transferSrcBuffer", false); | ||
m_transferDstBuffer = createBuffer(sizeof(uint16_t) * TransfersAmount * MaxValuesPerTransfer, core::bitflag(asset::IBuffer::EUF_NONE), "m_transferDstBuffer", true); | ||
|
||
for (uint16_t i = 0; i < uint16_t((uint32_t(1) << 16) - 1); i++) | ||
m_data.push_back(i); | ||
|
||
// We'll allow subsequent iterations to overlap each other on the GPU, the only limiting factors are | ||
// the amount of memory in the streaming buffers and the number of commandpools we can use simultaenously. | ||
constexpr auto MaxConcurrency = 64; | ||
|
||
// Since this time we don't throw the Command Pools away and we'll reset them instead, we don't create the pools with the transient flag | ||
m_poolCache = ICommandPoolCache::create(core::smart_refctd_ptr(m_device),getComputeQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::NONE,MaxConcurrency); | ||
|
||
// In contrast to fences, we just need one semaphore to rule all dispatches | ||
m_timeline = m_device->createSemaphore(m_iteration); | ||
|
||
return true; | ||
} | ||
|
||
// Ok this time we'll actually have a work loop (maybe just for the sake of future WASM so we don't timeout a Browser Tab with an unresponsive script) | ||
bool keepRunning() override { return m_iteration<MaxIterations; } | ||
|
||
// Finally the first actual work-loop | ||
void workLoopBody() override | ||
{ | ||
IQueue* const queue = getComputeQueue(); | ||
|
||
// Obtain our command pool once one gets recycled | ||
uint32_t poolIx; | ||
do | ||
{ | ||
poolIx = m_poolCache->acquirePool(); | ||
} while (poolIx==ICommandPoolCache::invalid_index); | ||
|
||
smart_refctd_ptr<IGPUCommandBuffer> cmdbuf; | ||
{ | ||
m_poolCache->getPool(poolIx)->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1},core::smart_refctd_ptr(m_logger)); | ||
// lets record, its still a one time submit because we have to re-record with different push constants each time | ||
cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); | ||
|
||
// COMMAND RECORDING | ||
uint32_t dataSize = (((sizeof(uint16_t) * m_data.size()) + 3) / 4) * 4; | ||
uint32_t maxUpload = 65536; | ||
for (uint32_t offset = 0; offset < dataSize; offset += maxUpload) | ||
{ | ||
cmdbuf->updateBuffer({ offset, maxUpload, core::smart_refctd_ptr<video::IGPUBuffer>(m_transferSrcBuffer) }, &m_data[offset / sizeof(uint16_t)]); | ||
} | ||
CPropertyPoolHandler::TransferRequest transferRequest; | ||
transferRequest.memblock = asset::SBufferRange<video::IGPUBuffer> { 0, sizeof(uint16_t) * m_data.size(), core::smart_refctd_ptr<video::IGPUBuffer>(m_transferSrcBuffer) }; | ||
transferRequest.elementSize = 1; | ||
transferRequest.elementCount = (m_data.size() * sizeof(uint16_t)) / sizeof(uint32_t); | ||
transferRequest.buffer = asset::SBufferBinding<video::IGPUBuffer> { 0, core::smart_refctd_ptr<video::IGPUBuffer>(m_transferDstBuffer) }; | ||
transferRequest.srcAddressesOffset = IPropertyPool::invalid; | ||
transferRequest.dstAddressesOffset = IPropertyPool::invalid; | ||
|
||
m_propertyPoolHandler->transferProperties(cmdbuf.get(), | ||
asset::SBufferBinding<video::IGPUBuffer>{0, core::smart_refctd_ptr(m_scratchBuffer)}, | ||
asset::SBufferBinding<video::IGPUBuffer>{0, core::smart_refctd_ptr(m_addressBuffer)}, | ||
&transferRequest, &transferRequest + 1, | ||
m_logger.get(), 0, m_data.size() | ||
); | ||
|
||
auto result = cmdbuf->end(); | ||
assert(result); | ||
} | ||
|
||
|
||
const auto savedIterNum = m_iteration++; | ||
{ | ||
const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = | ||
{ | ||
.cmdbuf = cmdbuf.get() | ||
}; | ||
const IQueue::SSubmitInfo::SSemaphoreInfo signalInfo = | ||
{ | ||
.semaphore = m_timeline.get(), | ||
.value = m_iteration, | ||
.stageMask = asset::PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT | ||
}; | ||
// Generally speaking we don't need to wait on any semaphore because in this example every dispatch gets its own clean piece of memory to use | ||
// from the point of view of the GPU. Implicit domain operations between Host and Device happen upon a submit and a semaphore/fence signal operation, | ||
// this ensures we can touch the input and get accurate values from the output memory using the CPU before and after respectively, each submit becoming PENDING. | ||
// If we actually cared about this submit seeing the memory accesses of a previous dispatch we could add a semaphore wait | ||
const IQueue::SSubmitInfo submitInfo = { | ||
.waitSemaphores = {}, | ||
.commandBuffers = {&cmdbufInfo,1}, | ||
.signalSemaphores = {&signalInfo,1} | ||
}; | ||
|
||
queue->startCapture(); | ||
auto statusCode = queue->submit({ &submitInfo,1 }); | ||
queue->endCapture(); | ||
assert(statusCode == IQueue::RESULT::SUCCESS); | ||
} | ||
|
||
{ | ||
ISemaphore::SWaitInfo infos[1] = {{.semaphore=m_timeline.get(),.value=m_iteration}}; | ||
m_device->blockForSemaphores(infos); | ||
|
||
// Readback ds | ||
// (we'll read back the destination buffer and check that copy went through as expected) | ||
auto mem = m_transferDstBuffer->getBoundMemory(); // Scratch buffer has the transfer requests | ||
void* ptr = mem.memory->map({ mem.offset, mem.memory->getAllocationSize() }); | ||
|
||
for (uint32_t i = 0; i < 1024; /*m_data.size();*/ i++) | ||
{ | ||
uint16_t expected = reinterpret_cast<uint16_t*>(ptr)[i]; | ||
uint16_t actual = m_data[i]; | ||
std::printf("%i, ", expected); | ||
assert(expected == actual); | ||
} | ||
std::printf("\n"); | ||
bool success = mem.memory->unmap(); | ||
assert(success); | ||
} | ||
} | ||
}; | ||
|
||
NBL_MAIN_FUNC(PropertyPoolsApp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import org.DevshGraphicsProgramming.Agent | ||
import org.DevshGraphicsProgramming.BuilderInfo | ||
import org.DevshGraphicsProgramming.IBuilder | ||
|
||
class CStreamingAndBufferDeviceAddressBuilder extends IBuilder | ||
{ | ||
public CStreamingAndBufferDeviceAddressBuilder(Agent _agent, _info) | ||
devshgraphicsprogramming marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
super(_agent, _info) | ||
} | ||
|
||
@Override | ||
public boolean prepare(Map axisMapping) | ||
{ | ||
return true | ||
} | ||
|
||
@Override | ||
public boolean build(Map axisMapping) | ||
{ | ||
IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") | ||
IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") | ||
|
||
def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) | ||
def nameOfConfig = getNameOfConfig(config) | ||
|
||
agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") | ||
|
||
return true | ||
} | ||
|
||
@Override | ||
public boolean test(Map axisMapping) | ||
{ | ||
return true | ||
} | ||
|
||
@Override | ||
public boolean install(Map axisMapping) | ||
{ | ||
return true | ||
} | ||
} | ||
|
||
def create(Agent _agent, _info) | ||
{ | ||
return new CStreamingAndBufferDeviceAddressBuilder(_agent, _info) | ||
} | ||
|
||
return this |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove files you're not using