diff --git a/31_compute_shader.cpp b/31_compute_shader.cpp new file mode 100644 index 0000000..8182df4 --- /dev/null +++ b/31_compute_shader.cpp @@ -0,0 +1,1185 @@ +#include +#include +#include +#include +#include +#if defined(__INTELLISENSE__) || !defined(USE_CPP20_MODULES) +#include "vulkan/vulkan.hpp" +#include +#include +#else +import vulkan_hpp; +#endif + +#define STB_IMAGE_IMPLEMENTATION +#include +#define TINYOBJLOADER_IMPLEMENTATION +#include +#define GLFW_INCLUDE_VULKAN +#include +// The perspective projection matrix generated by GLM will use the OpenGL depth range of -1.0 to 1.0 +// by default. We need to configure it to use the Vulkan range of 0.0 to 1.0 using the +// GLM_FORCE_DEPTH_ZERO_TO_ONE definition. +#define GLM_FORCE_DEPTH_ZERO_TO_ONE +#define GLM_ENABLE_EXPERIMENTAL +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +constexpr uint32_t WIDTH = 800; +constexpr uint32_t HEIGHT = 600; +constexpr uint32_t PARTICLE_COUNT = 8192; +constexpr int32_t MAX_FRAMES_IN_FLIGHT = 2; +const std::string SHADER_FILE = "shaders/31_shader_compute.spv"; + +const std::vector validationLayers = { + "VK_LAYER_KHRONOS_validation" +}; + +#ifdef NDEBUG +constexpr bool enableValidationLayers = false; +#else +constexpr bool enableValidationLayers = true; +#endif + +struct UniformBufferObject { + float deltaTime = 1.0f; +}; + +struct Particle { + glm::vec2 position; + glm::vec2 velocity; + glm::vec4 color; + + static vk::VertexInputBindingDescription getBindingDescription() { + return {0, sizeof(Particle), vk::VertexInputRate::eVertex}; + } + + static std::array getAttributeDescriptions() { + return { + vk::VertexInputAttributeDescription{0, 0, vk::Format::eR32G32Sfloat, offsetof(Particle, position)}, + vk::VertexInputAttributeDescription{1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(Particle, color)}, + }; + } +}; + +static std::vector readFile(const std::string &filename) { + std::ifstream file(filename, std::ios::ate | std::ios::binary); + if (!file.is_open()) { + throw std::runtime_error("failed to open file!"); + } + + std::vector buffer(file.tellg()); + file.seekg(0, std::ios::beg); + file.read(buffer.data(), static_cast(buffer.size())); + file.close(); + + return buffer; +} + +class ComputeShaderApplication { + public: + void run() { + initWindow(); + initVulkan(); + mainLoop(); + cleanup(); + } + private: + void initWindow() { + glfwInit(); + // Don't create an OpenGL context + glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); + glfwWindowHint(GLFW_RESIZABLE, GLFW_TRUE); + window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan", nullptr, nullptr); + glfwSetWindowUserPointer(window, this); + glfwSetFramebufferSizeCallback(window, framebufferResizeCallback); + lastTime = glfwGetTime(); + } + static void framebufferResizeCallback(GLFWwindow *window, int width, int height) { + auto app = reinterpret_cast(glfwGetWindowUserPointer(window)); + app->framebufferResized = true; + } + void initVulkan() { + createInstance(); + createSurface(); + pickPhysicalDevice(); + createLogicalDevice(); + createSwapChain(); + createImageViews(); + createComputeDescriptorSetLayout(); + createGraphicsPipeline(); + createComputePipeline(); + createCommandPool(); + createShaderStorageBuffers(); + createUniformBuffers(); + createDescriptorPool(); + createComputeDescriptorSets(); + createCommandBuffers(); + createComputeCommandBuffers(); + createSyncObjects(); + } + void mainLoop() { + while (!glfwWindowShouldClose(window)) { + glfwPollEvents(); + drawFrame(); + // We want to animate the particle system using the last frames time to get smooth, frame-rate independent animation + double currentTime = glfwGetTime(); + lastFrameTime = (currentTime - lastTime) * 1000.0; + lastTime = currentTime; + } + + device.waitIdle(); + } + void drawFrame() { + vk::Result result; + uint32_t imageIndex; + try { + std::tie(result, imageIndex) = swapChain.acquireNextImage(UINT64_MAX, nullptr, *drawFences[frameIndex]); + + if (result == vk::Result::eErrorOutOfDateKHR) { + recreateSwapChain(); + return; + } + if (result != vk::Result::eSuccess && result != vk::Result::eSuboptimalKHR) { + throw std::runtime_error("failed to acquire swap chain image!"); + } + + auto fenceResult = device.waitForFences(*drawFences[frameIndex], vk::True, UINT64_MAX); + if (fenceResult != vk::Result::eSuccess) { + throw std::runtime_error("failed to wait for fence!"); + } + } catch (const vk::SystemError &e) { + if (e.code().value() == static_cast(vk::Result::eErrorOutOfDateKHR)) { + recreateSwapChain(); + return; + } else { + throw; + } + } + + device.resetFences(*drawFences[frameIndex]); + + // Update timeline value for this frame + uint64_t computeWaitValue = timelineValue; + uint64_t computeSignalValue = ++timelineValue; + uint64_t graphicsWaitValue = computeSignalValue; + uint64_t graphicsSignalValue = ++timelineValue; + + updateUniformBuffer(frameIndex); + + { + recordComputeCommandBuffer(); + + // Submit compute work + vk::TimelineSemaphoreSubmitInfo computeTimelineInfo = { + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &computeWaitValue, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &computeSignalValue, + }; + vk::PipelineStageFlags waitStages[] = {vk::PipelineStageFlagBits::eComputeShader}; + vk::SubmitInfo computeSubmitInfo = { + .pNext = &computeTimelineInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &*semaphore, + .pWaitDstStageMask = waitStages, + .commandBufferCount = 1, + .pCommandBuffers = &*computeCommandBuffers[frameIndex], + .signalSemaphoreCount = 1, + .pSignalSemaphores = &*semaphore, + }; + + queue.submit(computeSubmitInfo, nullptr); + } + + { + recordCommandBuffer(imageIndex); + + // Submit graphics work (waits for compute to finish) + vk::PipelineStageFlags waitStage = vk::PipelineStageFlagBits::eVertexInput; + vk::TimelineSemaphoreSubmitInfo graphicsTimelineInfo = { + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &graphicsWaitValue, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &graphicsSignalValue, + }; + const vk::SubmitInfo graphicsSubmitInfo = { + .pNext = &graphicsTimelineInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &*semaphore, + .pWaitDstStageMask = &waitStage, + .commandBufferCount = 1, + .pCommandBuffers = &*commandBuffers[frameIndex], + .signalSemaphoreCount = 1, + .pSignalSemaphores = &*semaphore, + }; + queue.submit(graphicsSubmitInfo, nullptr); + + // Present the image (wait for graphics to finish) + vk::SemaphoreWaitInfo waitInfo = { + .semaphoreCount = 1, + .pSemaphores = &*semaphore, + .pValues = &graphicsSignalValue, + }; + // Wait for graphics to complete before presenting + auto result = device.waitSemaphores(waitInfo, UINT64_MAX); + if (result != vk::Result::eSuccess) + { + throw std::runtime_error("failed to wait for semaphore!"); + } + + try { + // Presentation + vk::PresentInfoKHR presentInfoKHR = { + .waitSemaphoreCount = 0, + .pWaitSemaphores = nullptr, + .swapchainCount = 1, + .pSwapchains = &*swapChain, + .pImageIndices = &imageIndex, + }; + result = queue.presentKHR(presentInfoKHR); + if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR || framebufferResized) { + framebufferResized = false; + recreateSwapChain(); + } else if (result != vk::Result::eSuccess) { + throw std::runtime_error("failed to present swap chain image!"); + } + } catch (const vk::SystemError &e) { + if (e.code().value() == static_cast(vk::Result::eErrorOutOfDateKHR)) { + recreateSwapChain(); + return; + } else { + throw; + } + } + } + + frameIndex = (frameIndex + 1) % MAX_FRAMES_IN_FLIGHT; + } + void cleanup() { + cleanupSwapChain(); + glfwDestroyWindow(window); + glfwTerminate(); + } + void createInstance() { + constexpr vk::ApplicationInfo appInfo { + .pApplicationName = "Hello Triangle", + .applicationVersion = VK_MAKE_VERSION(1, 0, 0), + .pEngineName = "No Engine", + .engineVersion = VK_MAKE_VERSION(1, 0, 0), + .apiVersion = vk::ApiVersion14, + }; + + // Get the required layers + std::vector requiredLayers; + if (enableValidationLayers) { + requiredLayers.assign(validationLayers.begin(), validationLayers.end()); + } + + // Check if the required layers are supported by the Vulkan implementation. + auto layerProperties = context.enumerateInstanceLayerProperties(); + if (std::ranges::any_of(requiredLayers, [&layerProperties](auto const& requiredLayer) { + return std::ranges::none_of(layerProperties, + [requiredLayer](auto const& layerProperty) + { return strcmp(layerProperty.layerName, requiredLayer) == 0; }); + })) + { + throw std::runtime_error("One or more required layers are not supported!"); + } + + // Get the required instance extensions from GLFW. + uint32_t glfwExtensionCount = 0; + auto glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); + + // Check if the required GLFW extensions are supported by the Vulkan implementation. + auto extensionProperties = context.enumerateInstanceExtensionProperties(); + for (uint32_t i = 0; i < glfwExtensionCount; ++i) + { + if (std::ranges::none_of(extensionProperties, + [glfwExtension = glfwExtensions[i]](auto const& extensionProperty) + { return strcmp(extensionProperty.extensionName, glfwExtension) == 0; })) + { + throw std::runtime_error("Required GLFW extension not supported: " + std::string(glfwExtensions[i])); + } + } + vk::InstanceCreateInfo createInfo { + .pApplicationInfo = &appInfo, + .enabledLayerCount = static_cast(requiredLayers.size()), + .ppEnabledLayerNames = requiredLayers.data(), + .enabledExtensionCount = glfwExtensionCount, + .ppEnabledExtensionNames = glfwExtensions, + }; + + instance = vk::raii::Instance(context, createInfo); + } + void createSurface() { + VkSurfaceKHR _surface; + if (glfwCreateWindowSurface(*instance, window, nullptr, &_surface) != 0) { + throw std::runtime_error("failed to create window surface!"); + } + + surface = vk::raii::SurfaceKHR(instance, _surface); + } + void pickPhysicalDevice() { + std::vector deviceExtensions = { + vk::KHRSwapchainExtensionName, + vk::KHRSpirv14ExtensionName, + vk::KHRCreateRenderpass2ExtensionName, + }; + + auto devices = instance.enumeratePhysicalDevices(); + if (devices.empty()) { + throw std::runtime_error("failed to find GPUs with Vulkan support!"); + } + + for (const auto &device : devices) { + auto deviceProperties = device.getProperties(); + auto deviceFeatures = device.getFeatures(); + auto queueFamilies = device.getQueueFamilyProperties(); + auto extensions = device.enumerateDeviceExtensionProperties(); + bool isSuitable = deviceProperties.apiVersion >= VK_API_VERSION_1_3; + bool extensionFound = true; + + const vk::QueueFamilyProperties *qf = nullptr; + for (const auto &qfp : queueFamilies) { + if ((qfp.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast(0)) { + qf = &qfp; + break; + } + } + + isSuitable = isSuitable && (qf != nullptr); + + for (const auto &extension : deviceExtensions) { + auto extensionIter = std::ranges::find_if(extensions, [extension](auto const & ext) {return strcmp(ext.extensionName, extension) == 0;}); + extensionFound = extensionFound && extensionIter != extensions.end(); + } + + isSuitable = isSuitable && extensionFound; + + if (isSuitable) { + physicalDevice = device; + return; + } + + throw std::runtime_error("failed to find a suitable GPU"); + } + } + void createLogicalDevice() { + std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); + graphicsComputeQueueIndex = findQueueFamilies(physicalDevice); + float queuePriority = 0.5f; + vk::DeviceQueueCreateInfo deviceQueueCreateInfo { + .queueFamilyIndex = graphicsComputeQueueIndex, + .queueCount = 1, + .pQueuePriorities = &queuePriority, + }; + + // Create a chain of feature structures + vk::StructureChain featureChain = { + {.features = { + .sampleRateShading = true, + .samplerAnisotropy = true, + }}, // vk::PhysicalDeviceFeatures2 + {.synchronization2 = true, + .dynamicRendering = true}, // Enable dynamic rendering and synchronization2 from Vulkan 1.3 + {.timelineSemaphore = true}, // Enable timeline semaphores from Vulkan 1.2 + {.shaderDrawParameters = true}, // Enable shader draw parameters from Vulkan 1.1 + {.extendedDynamicState = true} // Enable extended dynamic state from the extension + }; + + std::vector deviceExtensions = { + vk::KHRSwapchainExtensionName, + vk::KHRSpirv14ExtensionName, + vk::KHRSynchronization2ExtensionName, + vk::KHRCreateRenderpass2ExtensionName + }; + + vk::DeviceCreateInfo deviceCreateInfo { + .pNext = &featureChain.get(), + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &deviceQueueCreateInfo, + .enabledExtensionCount = static_cast(deviceExtensions.size()), + .ppEnabledExtensionNames = deviceExtensions.data(), + }; + + device = vk::raii::Device(physicalDevice, deviceCreateInfo); + queue = vk::raii::Queue(device, graphicsComputeQueueIndex, 0); + } + void createSwapChain() { + auto surfaceCapabilities = physicalDevice.getSurfaceCapabilitiesKHR(surface); + swapChainSurfaceFormat = chooseSwapSurfaceFormat(physicalDevice.getSurfaceFormatsKHR(surface)); + swapChainExtent = chooseSwapExtent(surfaceCapabilities); + auto minImageCount = std::max(3u, surfaceCapabilities.minImageCount); + minImageCount = (surfaceCapabilities.maxImageCount > 0 && + minImageCount > surfaceCapabilities.maxImageCount) ? + surfaceCapabilities.maxImageCount : + minImageCount; + + vk::SwapchainCreateInfoKHR swapChainCreateInfo { + .flags = vk::SwapchainCreateFlagsKHR(), + .surface = surface, + .minImageCount = minImageCount, + .imageFormat = swapChainSurfaceFormat.format, + .imageColorSpace = swapChainSurfaceFormat.colorSpace, + .imageExtent = swapChainExtent, + .imageArrayLayers = 1, + .imageUsage = vk::ImageUsageFlagBits::eColorAttachment, + .imageSharingMode = vk::SharingMode::eExclusive, + .preTransform = surfaceCapabilities.currentTransform, + .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque, + .presentMode = chooseSwapPresentMode(physicalDevice.getSurfacePresentModesKHR(surface)), + .clipped = true, + .oldSwapchain = nullptr, + }; + + swapChain = vk::raii::SwapchainKHR(device, swapChainCreateInfo); + swapChainImages = swapChain.getImages(); + swapChainImageFormat = swapChainSurfaceFormat.format; + } + void createImageViews() { + swapChainImageViews.clear(); + + vk::ImageViewCreateInfo imageViewCreateInfo{ + .viewType = vk::ImageViewType::e2D, + .format = swapChainImageFormat, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + } + }; + + for (auto image : swapChainImages) { + imageViewCreateInfo.image = image; + swapChainImageViews.emplace_back(vk::raii::ImageView(device, imageViewCreateInfo)); + } + } + void createComputeDescriptorSetLayout() { + std::array bindings = { + vk::DescriptorSetLayoutBinding{0, vk::DescriptorType::eUniformBuffer, + 1, vk::ShaderStageFlagBits::eCompute, nullptr}, + vk::DescriptorSetLayoutBinding{1, vk::DescriptorType::eStorageBuffer, + 1, vk::ShaderStageFlagBits::eCompute, nullptr}, + vk::DescriptorSetLayoutBinding{2, vk::DescriptorType::eStorageBuffer, + 1, vk::ShaderStageFlagBits::eCompute, nullptr}, + }; + vk::DescriptorSetLayoutCreateInfo layoutInfo{ + .bindingCount = bindings.size(), + .pBindings = bindings.data(), + }; + computeDescriptorSetLayout = vk::raii::DescriptorSetLayout(device, layoutInfo); + } + void createGraphicsPipeline() { + vk::raii::ShaderModule shaderModule = createShaderModule(readFile(SHADER_FILE)); + + vk::PipelineShaderStageCreateInfo vertShaderStageInfo = { + .stage = vk::ShaderStageFlagBits::eVertex, + .module = shaderModule, + .pName = "vertMain", + }; + vk::PipelineShaderStageCreateInfo fragShaderStageInfo = { + .stage = vk::ShaderStageFlagBits::eFragment, + .module = shaderModule, + .pName = "fragMain", + }; + + vk::PipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; + + // Particles input + auto bindingDescription = Particle::getBindingDescription(); + auto attributeDescriptions = Particle::getAttributeDescriptions(); + vk::PipelineVertexInputStateCreateInfo vertexInputInfo{ + .vertexBindingDescriptionCount = 1, + .pVertexBindingDescriptions = &bindingDescription, + .vertexAttributeDescriptionCount = attributeDescriptions.size(), + .pVertexAttributeDescriptions = attributeDescriptions.data(), + }; + + // Input assembly + vk::PipelineInputAssemblyStateCreateInfo inputAssembly = { + .topology = vk::PrimitiveTopology::ePointList, + .primitiveRestartEnable = vk::False, + }; + + // Dynamic state + std::vector dynamicStates = { + vk::DynamicState::eViewport, + vk::DynamicState::eScissor, + }; + vk::PipelineDynamicStateCreateInfo dynamicState = { + .dynamicStateCount = static_cast(dynamicStates.size()), + .pDynamicStates = dynamicStates.data(), + }; + // No need to specify viewport and scissor because they will be specified dynamically + vk::PipelineViewportStateCreateInfo viewportState = { + .viewportCount = 1, + .scissorCount = 1, + }; + + // Rasterisation + vk::PipelineRasterizationStateCreateInfo rasterizer = { + .depthClampEnable = vk::False, + .rasterizerDiscardEnable = vk::False, + .polygonMode = vk::PolygonMode::eFill, + .cullMode = vk::CullModeFlagBits::eBack, + .frontFace = vk::FrontFace::eCounterClockwise, + .depthBiasEnable = vk::False, + .depthBiasSlopeFactor = 1.0f, + .lineWidth = 1.0f + }; + + // Multisampling + vk::PipelineMultisampleStateCreateInfo multisampling = { + .rasterizationSamples = vk::SampleCountFlagBits::e1, + .sampleShadingEnable = vk::False, + }; + + // Color blending + vk::PipelineColorBlendAttachmentState colorBlendAttachment = { + .blendEnable = vk::True, + .srcColorBlendFactor = vk::BlendFactor::eSrcAlpha, + .dstColorBlendFactor = vk::BlendFactor::eOneMinusSrcAlpha, + .colorBlendOp = vk::BlendOp::eAdd, + .srcAlphaBlendFactor = vk::BlendFactor::eOneMinusSrcAlpha, + .dstAlphaBlendFactor = vk::BlendFactor::eZero, + .alphaBlendOp = vk::BlendOp::eAdd, + .colorWriteMask = vk::ColorComponentFlagBits::eR | + vk::ColorComponentFlagBits::eG | + vk::ColorComponentFlagBits::eB | + vk::ColorComponentFlagBits::eA, + }; + vk::PipelineColorBlendStateCreateInfo colorBlending = { + .logicOpEnable = vk::False, + .logicOp = vk::LogicOp::eCopy, + .attachmentCount = 1, + .pAttachments = &colorBlendAttachment, + }; + + // Pipeline layout + vk::PipelineLayoutCreateInfo pipelineLayoutInfo{}; + graphicsPipelineLayout = vk::raii::PipelineLayout(device, pipelineLayoutInfo); + + // Dynamic rendering pipeline + vk::PipelineRenderingCreateInfo pipelineRenderingCreateInfo = { + .colorAttachmentCount = 1, + .pColorAttachmentFormats = &swapChainImageFormat, + }; + vk::GraphicsPipelineCreateInfo pipelineInfo = { + .pNext = &pipelineRenderingCreateInfo, + .stageCount = 2, + .pStages = shaderStages, + .pVertexInputState = &vertexInputInfo, + .pInputAssemblyState = &inputAssembly, + .pViewportState = &viewportState, + .pRasterizationState = &rasterizer, + .pMultisampleState = &multisampling, + .pColorBlendState = &colorBlending, + .pDynamicState = &dynamicState, + .layout = graphicsPipelineLayout, + .renderPass = nullptr, + }; + + // Create pipeline + graphicsPipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); + } + void createComputePipeline() { + vk::raii::ShaderModule shaderModule = createShaderModule(readFile(SHADER_FILE)); + + vk::PipelineShaderStageCreateInfo computeShaderStageInfo = { + .stage = vk::ShaderStageFlagBits::eCompute, + .module = shaderModule, + .pName = "compMain", + }; + vk::PipelineLayoutCreateInfo layoutInfo = { + .setLayoutCount = 1, + .pSetLayouts = &*computeDescriptorSetLayout, + }; + computePipelineLayout = vk::raii::PipelineLayout(device, layoutInfo); + + vk::ComputePipelineCreateInfo pipelineInfo = { + .stage = computeShaderStageInfo, + .layout = *computePipelineLayout, + }; + computePipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); + } + void createCommandPool() { + vk::CommandPoolCreateInfo poolInfo = { + .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, + .queueFamilyIndex = graphicsComputeQueueIndex, + }; + commandPool = vk::raii:: CommandPool(device, poolInfo); + } + void createShaderStorageBuffers() { + std::default_random_engine rndEngine(static_cast(time(nullptr))); + std::uniform_real_distribution rndDist(0.0f, 1.0f); + + // Initialise particle positions on a circle + std::vector particles(PARTICLE_COUNT); + for (auto &particle : particles) { + float r = 0.25f * sqrtf(rndDist(rndEngine)); + float theta = rndDist(rndEngine) * 2.0f * 3.14159265358979323846f; + float x = r * cosf(theta) * HEIGHT / WIDTH; + float y = r * sinf(theta); + particle.position = glm::vec2(x, y); + particle.velocity = normalize(glm::vec2(x, y)) * 0.00025f; + particle.color = glm::vec4(rndDist(rndEngine), rndDist(rndEngine), rndDist(rndEngine), 1.0f); + } + + vk::DeviceSize bufferSize = sizeof(Particle) * PARTICLE_COUNT; + + // Create a staging buffer used to upload data to the gpu + vk::raii::Buffer stagingBuffer({}); + vk::raii::DeviceMemory stagingBufferMemory({}); + createBuffer(bufferSize, vk::BufferUsageFlagBits::eTransferSrc, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + stagingBuffer, stagingBufferMemory); + + void *dataStaging = stagingBufferMemory.mapMemory(0, bufferSize); + memcpy(dataStaging, particles.data(), static_cast(bufferSize)); + stagingBufferMemory.unmapMemory(); + + shaderStorageBuffers.clear(); + shaderStorageBuffersMemory.clear(); + + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + vk::raii::Buffer shaderStorageBufferTemp({}); + vk::raii::DeviceMemory shaderStorageBufferTempMemory({}); + createBuffer(bufferSize, vk::BufferUsageFlagBits::eStorageBuffer | + vk::BufferUsageFlagBits::eVertexBuffer | + vk::BufferUsageFlagBits::eTransferDst, + vk::MemoryPropertyFlagBits::eDeviceLocal, shaderStorageBufferTemp, + shaderStorageBufferTempMemory); + copyBuffer(stagingBuffer, shaderStorageBufferTemp, bufferSize); + shaderStorageBuffers.emplace_back(std::move(shaderStorageBufferTemp)); + shaderStorageBuffersMemory.emplace_back(std::move(shaderStorageBufferTempMemory)); + } + } + void createUniformBuffers() { + uniformBuffers.clear(); + uniformBuffersMemory.clear(); + unifromBuffersMapped.clear(); + + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + vk::DeviceSize bufferSize = sizeof(UniformBufferObject); + vk::raii::Buffer buffer({}); + vk::raii::DeviceMemory bufferMem({}); + createBuffer(bufferSize, vk::BufferUsageFlagBits::eUniformBuffer, + vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, + buffer, bufferMem); + uniformBuffers.emplace_back(std::move(buffer)); + uniformBuffersMemory.emplace_back(std::move(bufferMem)); + unifromBuffersMapped.emplace_back(uniformBuffersMemory[i].mapMemory(0, bufferSize)); + } + } + void createDescriptorPool() { + std::array poolSizes = { + vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, MAX_FRAMES_IN_FLIGHT}, + vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, MAX_FRAMES_IN_FLIGHT * 2}, + }; + vk::DescriptorPoolCreateInfo poolInfo = { + .flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, + .maxSets = MAX_FRAMES_IN_FLIGHT, + .poolSizeCount = poolSizes.size(), + .pPoolSizes = poolSizes.data(), + }; + descriptorPool = vk::raii::DescriptorPool(device, poolInfo); + } + void createComputeDescriptorSets() { + std::vector layouts(MAX_FRAMES_IN_FLIGHT, computeDescriptorSetLayout); + vk::DescriptorSetAllocateInfo allocInfo { + .descriptorPool = *descriptorPool, + .descriptorSetCount = MAX_FRAMES_IN_FLIGHT, + .pSetLayouts = layouts.data(), + }; + computeDescriptorSets.clear(); + computeDescriptorSets = device.allocateDescriptorSets(allocInfo); + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + vk::DescriptorBufferInfo bufferInfo { + .buffer = uniformBuffers[i], + .offset = 0, + .range = sizeof(UniformBufferObject), + }; + vk::DescriptorBufferInfo storageBufferInfoLastFrame { + .buffer = shaderStorageBuffers[(i - 1) % MAX_FRAMES_IN_FLIGHT], + .offset = 0, + .range = sizeof(Particle) * PARTICLE_COUNT, + }; + vk::DescriptorBufferInfo storageBufferInfoCurrentFrame { + .buffer = shaderStorageBuffers[i], + .offset = 0, + .range = sizeof(Particle) * PARTICLE_COUNT, + }; + + std::array descriptorWrites = { + vk::WriteDescriptorSet{ + .dstSet = computeDescriptorSets[i], + .dstBinding = 0, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eUniformBuffer, + .pBufferInfo = &bufferInfo, + }, + vk::WriteDescriptorSet{ + .dstSet = computeDescriptorSets[i], + .dstBinding = 1, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &storageBufferInfoLastFrame, + }, + vk::WriteDescriptorSet{ + .dstSet = computeDescriptorSets[i], + .dstBinding = 2, + .dstArrayElement = 0, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eStorageBuffer, + .pBufferInfo = &storageBufferInfoCurrentFrame, + } + }; + + device.updateDescriptorSets(descriptorWrites, {}); + } + } + void createCommandBuffers() { + commandBuffers.clear(); + vk::CommandBufferAllocateInfo allocInfo = { + .commandPool = commandPool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = MAX_FRAMES_IN_FLIGHT, + }; + commandBuffers = vk::raii::CommandBuffers(device, allocInfo); + } + void createComputeCommandBuffers() { + computeCommandBuffers.clear(); + vk::CommandBufferAllocateInfo allocInfo = { + .commandPool = commandPool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = MAX_FRAMES_IN_FLIGHT, + }; + computeCommandBuffers = vk::raii::CommandBuffers(device, allocInfo); + } + void createSyncObjects() { + drawFences.clear(); + + vk::SemaphoreTypeCreateInfo semaphoreType { + .semaphoreType = vk::SemaphoreType::eTimeline, + .initialValue = 0, + }; + semaphore = vk::raii::Semaphore(device, {.pNext = &semaphoreType}); + timelineValue = 0; + + for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { + vk::FenceCreateInfo fenceInfo {}; + drawFences.emplace_back(device, fenceInfo); + } + } + void copyBufferToImage(const vk::raii::Buffer& buffer, vk::raii::Image& image, uint32_t width, uint32_t height) { + vk::raii::CommandBuffer commandBuffer = beginSingleTimeCommands(); + vk::BufferImageCopy region { + .bufferOffset = 0, + .bufferRowLength = 0, + .bufferImageHeight = 0, + .imageSubresource = { vk::ImageAspectFlagBits::eColor, 0, 0, 1 }, + .imageOffset = { 0, 0, 0 }, + .imageExtent = { width, height, 1 } + }; + commandBuffer.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, {region}); + endSingleTimeCommands(commandBuffer); + } + void updateUniformBuffer(uint32_t currentImage) { + UniformBufferObject ubo { + .deltaTime = static_cast(lastFrameTime) * 2.0f, + }; + memcpy(unifromBuffersMapped[currentImage], &ubo, sizeof(ubo)); + } + void recordCommandBuffer(uint32_t imageIndex) { + auto &commandBuffer = commandBuffers[frameIndex]; + + // Begin recording the command buffer + commandBuffer.begin({}); + + // Before starting rendering, transition the swapchain image to COLOR_ATTACHMENT_OPTIMAL + transitionRenderingImageLayout( + swapChainImages[imageIndex], + vk::ImageLayout::eUndefined, + vk::ImageLayout::eColorAttachmentOptimal, + {}, // srcAccessMask (no need to wait for previous operations) + vk::AccessFlagBits2::eColorAttachmentWrite, // dstAccessMask + vk::PipelineStageFlagBits2::eColorAttachmentOutput, // srcStage + vk::PipelineStageFlagBits2::eColorAttachmentOutput, // dstStage + vk::ImageAspectFlagBits::eColor // aspectFlags + ); + + vk::ClearValue clearColor = vk::ClearColorValue(0.0f, 0.0f, 0.0f, 1.0f); + vk::RenderingAttachmentInfo colorAttachmentInfo = { + .imageView = swapChainImageViews[imageIndex], + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = vk::AttachmentLoadOp::eClear, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = clearColor, + }; + vk::RenderingInfo renderingInfo = { + .renderArea = { .offset = { 0, 0 }, .extent = swapChainExtent }, + .layerCount = 1, + .colorAttachmentCount = 1, + .pColorAttachments = &colorAttachmentInfo, + }; + + commandBuffer.beginRendering(renderingInfo); + commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, graphicsPipeline); + commandBuffer.bindVertexBuffers(0, {shaderStorageBuffers[frameIndex]}, {0}); + // Viewport and scissor are dynamic so we need to set them + vk::Viewport viewport = { + .x = 0.0f, + .y = 0.0f, + .width = static_cast(swapChainExtent.width), + .height = static_cast(swapChainExtent.height), + .minDepth = 0.0f, + .maxDepth = 1.0f, + }; + commandBuffer.setViewport(0, viewport); + commandBuffer.setScissor(0, vk::Rect2D(vk::Offset2D(0, 0), swapChainExtent)); + + // Issue the draw command + commandBuffer.draw(PARTICLE_COUNT, 1, 0, 0); + + commandBuffer.endRendering(); + + // After rendering, transition the swapchain image to PRESENT_SRC + transitionRenderingImageLayout( + swapChainImages[imageIndex], + vk::ImageLayout::eColorAttachmentOptimal, + vk::ImageLayout::ePresentSrcKHR, + vk::AccessFlagBits2::eColorAttachmentWrite, // srcAccessMask + {}, // dstAccessMask + vk::PipelineStageFlagBits2::eColorAttachmentOutput, // srcStage + vk::PipelineStageFlagBits2::eBottomOfPipe, // dstStage + vk::ImageAspectFlagBits::eColor // aspectFlags + ); + + // Finish recording the command buffer + commandBuffer.end(); + } + void recordComputeCommandBuffer() { + auto &commandBuffer = computeCommandBuffers[frameIndex]; + commandBuffer.begin({}); + commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute, computePipeline); + commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, computePipelineLayout, + 0, {computeDescriptorSets[frameIndex]}, {}); + commandBuffer.dispatch(PARTICLE_COUNT / 256, 1, 1); + commandBuffer.end(); + } + void createBuffer( + vk::DeviceSize size, + vk::BufferUsageFlags usage, + vk::MemoryPropertyFlags properties, + vk::raii::Buffer &buffer, + vk::raii::DeviceMemory &bufferMemory + ) { + vk::BufferCreateInfo bufferInfo = { + .size = size, + .usage = usage, + .sharingMode = vk::SharingMode::eExclusive, + }; + buffer = vk::raii::Buffer(device, bufferInfo); + + vk::MemoryRequirements memRequirements = buffer.getMemoryRequirements(); + vk::MemoryAllocateInfo memAllocateInfo = { + .allocationSize = memRequirements.size, + .memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties) + }; + bufferMemory = vk::raii::DeviceMemory(device, memAllocateInfo); + buffer.bindMemory(bufferMemory, 0); + } + void copyBuffer(vk::raii::Buffer &srcBuffer, vk::raii::Buffer &dstBuffer, vk::DeviceSize size) { + vk::raii::CommandBuffer commandCopyBuffer = beginSingleTimeCommands(); + commandCopyBuffer.copyBuffer(*srcBuffer, *dstBuffer, vk::BufferCopy(0, 0, size)); + endSingleTimeCommands(commandCopyBuffer); + } + vk::raii::CommandBuffer beginSingleTimeCommands() { + vk::CommandBufferAllocateInfo allocInfo { + .commandPool = commandPool, + .level = vk::CommandBufferLevel::ePrimary, + .commandBufferCount = 1 + }; + vk::raii::CommandBuffer commandBuffer = std::move(device.allocateCommandBuffers(allocInfo).front()); + + vk::CommandBufferBeginInfo beginInfo{ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit }; + commandBuffer.begin(beginInfo); + + return commandBuffer; + } + bool hasStencilComponent(vk::Format format) { + return format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD24UnormS8Uint; + } + vk::Format findSupportedFormat(const std::vector& candidates, vk::ImageTiling tiling, + vk::FormatFeatureFlags features) { + for (const auto format : candidates) { + vk::FormatProperties props = physicalDevice.getFormatProperties(format); + if (tiling == vk::ImageTiling::eLinear && (props.linearTilingFeatures & features) == features) { + return format; + } + if (tiling == vk::ImageTiling::eOptimal && (props.optimalTilingFeatures & features) == features) { + return format; + } + } + + throw std::runtime_error("failed to find supported format!"); + } + void endSingleTimeCommands(vk::raii::CommandBuffer& commandBuffer) { + commandBuffer.end(); + + vk::SubmitInfo submitInfo{ .commandBufferCount = 1, .pCommandBuffers = &*commandBuffer }; + queue.submit(submitInfo, nullptr); + queue.waitIdle(); + } + uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties) { + vk::PhysicalDeviceMemoryProperties memProperties = physicalDevice.getMemoryProperties(); + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + if ((typeFilter & (1 << i) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties)) { + return i; + } + } + + throw std::runtime_error("failed to find suitable memory type!"); + } + void transitionImageLayout(const vk::raii::Image &image, vk::ImageLayout oldLayout, + vk::ImageLayout newLayout, uint32_t mipLevels) { + vk::raii::CommandBuffer commandBuffer = beginSingleTimeCommands(); + + vk::ImageMemoryBarrier barrier{ + .oldLayout = oldLayout, + .newLayout = newLayout, + .image = image, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = mipLevels, + .baseArrayLayer = 0, + .layerCount = 1, + } + }; + + vk::PipelineStageFlags sourceStage; + vk::PipelineStageFlags destinationStage; + + if (oldLayout == vk::ImageLayout::eUndefined && newLayout == vk::ImageLayout::eTransferDstOptimal) + { + barrier.srcAccessMask = {}; + barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; + + sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; + destinationStage = vk::PipelineStageFlagBits::eTransfer; + } + else if (oldLayout == vk::ImageLayout::eTransferDstOptimal && newLayout == vk::ImageLayout::eShaderReadOnlyOptimal) + { + barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; + barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; + + sourceStage = vk::PipelineStageFlagBits::eTransfer; + destinationStage = vk::PipelineStageFlagBits::eFragmentShader; + } + else + { + throw std::invalid_argument("unsupported layout transition!"); + } + + commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, {}, nullptr, barrier); + endSingleTimeCommands(commandBuffer); + } + void transitionRenderingImageLayout( + vk::Image image, + vk::ImageLayout oldLayout, + vk::ImageLayout newLayout, + vk::AccessFlags2 srcAccessMask, + vk::AccessFlags2 dstAccessMask, + vk::PipelineStageFlags2 srcStageMask, + vk::PipelineStageFlags2 dstStageMask, + vk::ImageAspectFlags aspectFlags + ) { + vk::ImageMemoryBarrier2 barrier = { + .srcStageMask = srcStageMask, + .srcAccessMask = srcAccessMask, + .dstStageMask = dstStageMask, + .dstAccessMask = dstAccessMask, + .oldLayout = oldLayout, + .newLayout = newLayout, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = { + .aspectMask = aspectFlags, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1 + } + }; + vk::DependencyInfo dependencyInfo = { + .dependencyFlags = {}, + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &barrier + }; + commandBuffers[frameIndex].pipelineBarrier2(dependencyInfo); + } + void cleanupSwapChain() { + swapChainImageViews.clear(); + swapChain = nullptr; + } + void recreateSwapChain() { + int width = 0, height = 0; + glfwGetFramebufferSize(window, &width, &height); + while (width == 0 || height == 0) { + glfwGetFramebufferSize(window, &width, &height); + glfwWaitEvents(); + } + + device.waitIdle(); + cleanupSwapChain(); + createSwapChain(); + createImageViews(); + } + [[nodiscard]] vk::raii::ShaderModule createShaderModule(const std::vector &code) const { + vk::ShaderModuleCreateInfo createInfo { + .codeSize = code.size() * sizeof(char), + .pCode = reinterpret_cast(code.data()), + }; + + return vk::raii::ShaderModule{device, createInfo}; + } + vk::SurfaceFormatKHR chooseSwapSurfaceFormat(const std::vector& availableFormats) { + for (const auto& availableFormat : availableFormats) { + if (availableFormat.format == vk::Format::eB8G8R8A8Srgb && availableFormat.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear) { + return availableFormat; + } + } + + return availableFormats[0]; + } + vk::PresentModeKHR chooseSwapPresentMode(const std::vector& availablePresentModes) { + for (const auto& availablePresentMode : availablePresentModes) { + if (availablePresentMode == vk::PresentModeKHR::eMailbox) { + return availablePresentMode; + } + } + + return vk::PresentModeKHR::eFifo; + } + vk::Extent2D chooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities) { + if (capabilities.currentExtent.width != std::numeric_limits::max()) { + return capabilities.currentExtent; + } + + int width, height; + glfwGetFramebufferSize(window, &width, &height); + + return { + std::clamp(width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width), + std::clamp(height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height), + }; + } + uint32_t findQueueFamilies(vk::raii::PhysicalDevice physicalDevice) { + // find the index of the first queue family that supports graphics + std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); + + // get the first index into queueFamilyProperties which supports graphics, compute and present + uint32_t queueIndex = ~0; + for (uint32_t qfpIndex = 0; qfpIndex < queueFamilyProperties.size(); ++qfpIndex) { + if ((queueFamilyProperties[qfpIndex].queueFlags & vk::QueueFlagBits::eGraphics) && + (queueFamilyProperties[qfpIndex].queueFlags & vk::QueueFlagBits::eCompute) && + physicalDevice.getSurfaceSupportKHR(qfpIndex, *surface)) { + queueIndex = qfpIndex; + break; + } + } + + if (queueIndex == ~0) { + throw std::runtime_error("Could not find a queue for graphics and present -> terminating"); + } + + return queueIndex; + } + vk::SampleCountFlagBits getMaxUsableSampleCount() { + vk::PhysicalDeviceProperties props = physicalDevice.getProperties(); + vk::SampleCountFlags counts = props.limits.framebufferColorSampleCounts & + props.limits.framebufferDepthSampleCounts; + + if (counts & vk::SampleCountFlagBits::e64) { return vk::SampleCountFlagBits::e64; } + if (counts & vk::SampleCountFlagBits::e32) { return vk::SampleCountFlagBits::e32; } + if (counts & vk::SampleCountFlagBits::e16) { return vk::SampleCountFlagBits::e16; } + if (counts & vk::SampleCountFlagBits::e8) { return vk::SampleCountFlagBits::e8; } + if (counts & vk::SampleCountFlagBits::e4) { return vk::SampleCountFlagBits::e4; } + if (counts & vk::SampleCountFlagBits::e2) { return vk::SampleCountFlagBits::e2; } + + return vk::SampleCountFlagBits::e1; + } + + uint32_t frameIndex = 0; + double lastFrameTime = 0.0; + double lastTime = 0.0; + bool framebufferResized = false; + GLFWwindow *window; + vk::raii::Context context; + vk::raii::Instance instance = nullptr; + vk::raii::PhysicalDevice physicalDevice = nullptr; + uint32_t graphicsComputeQueueIndex = ~0 ; + vk::raii::Device device = nullptr; + vk::raii::Queue queue = nullptr; + vk::raii::SurfaceKHR surface = nullptr; + vk::raii::SwapchainKHR swapChain = nullptr; + vk::SurfaceFormatKHR swapChainSurfaceFormat; + vk::Extent2D swapChainExtent; + vk::Format swapChainImageFormat = vk::Format::eUndefined; + std::vector swapChainImages; + std::vector swapChainImageViews; + vk::raii::DescriptorSetLayout computeDescriptorSetLayout = nullptr; + vk::raii::DescriptorPool descriptorPool = nullptr; + std::vector computeDescriptorSets; + vk::raii::PipelineLayout graphicsPipelineLayout = nullptr; + vk::raii::Pipeline graphicsPipeline = nullptr; + vk::raii::PipelineLayout computePipelineLayout = nullptr; + vk::raii::Pipeline computePipeline = nullptr; + vk::raii::CommandPool commandPool = nullptr; + vk::raii::Semaphore semaphore = nullptr; + uint64_t timelineValue = 0; + std::vector shaderStorageBuffers; + std::vector shaderStorageBuffersMemory; + std::vector uniformBuffers; + std::vector uniformBuffersMemory; + std::vector unifromBuffersMapped; + std::vector commandBuffers; + std::vector computeCommandBuffers; + std::vector drawFences; +}; + +int main() { + ComputeShaderApplication app; + + try { + app.run(); + } catch (const std::exception &e) { + std::cout << e.what() << std::endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index 0afedae..9e8e871 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -106,7 +106,7 @@ endfunction () function (add_slang_shader_target TARGET) cmake_parse_arguments ("SHADER" "" "CHAPTER_NAME" "SOURCES" ${ARGN}) set (SHADERS_DIR ${CHAPTER_NAME}/shaders) - file(GLOB HAS_COMPUTE shaders/${CHAPTER_SHADER}.comp) + file(GLOB HAS_COMPUTE shaders/${CHAPTER_SHADER}.comp.slang) set (ENTRY_POINTS -entry vertMain -entry fragMain) if(HAS_COMPUTE) list(APPEND ENTRY_POINTS -entry compMain) @@ -155,7 +155,7 @@ function (add_chapter CHAPTER_NAME) endif() set (CHAPTER_SHADER_SLANG_TARGET ${CHAPTER_NAME}_slang_shader) - file (GLOB SHADER_SLANG_SOURCES shaders/${CHAPTER_SHADER}.slang) + file (GLOB SHADER_SLANG_SOURCES shaders/${CHAPTER_SHADER}*.slang) if(SHADER_SLANG_SOURCES) add_slang_shader_target( ${CHAPTER_SHADER_SLANG_TARGET} CHAPTER_NAME ${CHAPTER_NAME} SOURCES ${SHADER_SLANG_SOURCES}) add_dependencies(${CHAPTER_NAME} ${CHAPTER_SHADER_SLANG_TARGET}) @@ -275,10 +275,10 @@ add_chapter (30_multisampling TEXTURES images/viking_room.png LIBS glm::glm tinyobjloader::tinyobjloader) -# add_chapter (31_compute_shader -# SHADER 31_shader_compute -# LIBS glm::glm) -# +add_chapter (31_compute_shader + SHADER 31_shader_compute + LIBS glm::glm) + # add_chapter (32_ecosystem_utilities # SHADER 27_shader_depth # MODELS models/viking_room.obj diff --git a/shaders/31_shader_compute.comp.slang b/shaders/31_shader_compute.comp.slang new file mode 100644 index 0000000..cd3c7ba --- /dev/null +++ b/shaders/31_shader_compute.comp.slang @@ -0,0 +1,66 @@ +struct VSInput { + float2 inPosition; + float3 inColor; +}; + +struct VSOutput { + float4 pos : SV_Position; + float pointSize : SV_PointSize; + float3 fragColor : COLOR0; +}; + +struct PSInput { + float4 pos : SV_Position; + float3 fragColor : COLOR0; + float2 pointCoord : SV_PointCoord; +}; + +[shader("vertex")] +VSOutput vertMain(VSInput input) { + VSOutput output; + output.pointSize = 14.0; + output.pos = float4(input.inPosition, 1.0, 1.0); + output.fragColor = input.inColor.rgb; + return output; +} + +[shader("fragment")] +float4 fragMain(PSInput input) : SV_Target { + float2 coord = input.pointCoord - float2(0.5); + return float4(input.fragColor, 0.5 - length(coord)); +} + +struct Particle { + float2 position; + float2 velocity; + float4 color; +}; + +struct UniformBuffer { + float deltaTime; +}; +ConstantBuffer ubo; + +struct ParticleSSBO { + Particle particles; +}; +StructuredBuffer particlesIn; +RWStructuredBuffer particlesOut; + +[shader("compute")] +[numthreads(256, 1, 1)] +void compMain(uint3 threadId : SV_DispatchThreadID) { + uint index = threadId.x; + + particlesOut[index].particles.position = particlesIn[index].particles.position + + particlesIn[index].particles.velocity.xy * ubo.deltaTime; + particlesOut[index].particles.velocity = particlesIn[index].particles.velocity; + + // Flip movement at window border + if ((particlesOut[index].particles.position.x <= -1.0) || (particlesOut[index].particles.position.x >= 1.0)) { + particlesOut[index].particles.velocity.x = -particlesOut[index].particles.velocity.x; + } + if ((particlesOut[index].particles.position.y <= -1.0) || (particlesOut[index].particles.position.y >= 1.0)) { + particlesOut[index].particles.velocity.y = -particlesOut[index].particles.velocity.y; + } +}