#include #include #include #include #include #if defined(__INTELLISENSE__) || !defined(USE_CPP20_MODULES) #include "vulkan/vulkan.hpp" #include #include #else import vulkan_hpp; #endif #define STB_IMAGE_IMPLEMENTATION #include #define TINYOBJLOADER_IMPLEMENTATION #include #define GLFW_INCLUDE_VULKAN #include // The perspective projection matrix generated by GLM will use the OpenGL depth range of -1.0 to 1.0 // by default. We need to configure it to use the Vulkan range of 0.0 to 1.0 using the // GLM_FORCE_DEPTH_ZERO_TO_ONE definition. #define GLM_FORCE_DEPTH_ZERO_TO_ONE #define GLM_ENABLE_EXPERIMENTAL #include #include #include #include #include #include #include #include #include #include #include #include #include #include constexpr uint32_t WIDTH = 800; constexpr uint32_t HEIGHT = 600; constexpr uint32_t PARTICLE_COUNT = 8192; constexpr int32_t MAX_FRAMES_IN_FLIGHT = 2; const std::string SHADER_FILE = "shaders/31_shader_compute.spv"; const std::vector validationLayers = { "VK_LAYER_KHRONOS_validation" }; #ifdef NDEBUG constexpr bool enableValidationLayers = false; #else constexpr bool enableValidationLayers = true; #endif struct UniformBufferObject { float deltaTime = 1.0f; }; struct Particle { glm::vec2 position; glm::vec2 velocity; glm::vec4 color; static vk::VertexInputBindingDescription getBindingDescription() { return {0, sizeof(Particle), vk::VertexInputRate::eVertex}; } static std::array getAttributeDescriptions() { return { vk::VertexInputAttributeDescription{0, 0, vk::Format::eR32G32Sfloat, offsetof(Particle, position)}, vk::VertexInputAttributeDescription{1, 0, vk::Format::eR32G32B32A32Sfloat, offsetof(Particle, color)}, }; } }; static std::vector readFile(const std::string &filename) { std::ifstream file(filename, std::ios::ate | std::ios::binary); if (!file.is_open()) { throw std::runtime_error("failed to open file!"); } std::vector buffer(file.tellg()); file.seekg(0, std::ios::beg); file.read(buffer.data(), static_cast(buffer.size())); file.close(); return buffer; } class ComputeShaderApplication { public: void run() { initWindow(); initVulkan(); mainLoop(); cleanup(); } private: void initWindow() { glfwInit(); // Don't create an OpenGL context glfwWindowHint(GLFW_CLIENT_API, GLFW_NO_API); glfwWindowHint(GLFW_RESIZABLE, GLFW_TRUE); window = glfwCreateWindow(WIDTH, HEIGHT, "Vulkan", nullptr, nullptr); glfwSetWindowUserPointer(window, this); glfwSetFramebufferSizeCallback(window, framebufferResizeCallback); lastTime = glfwGetTime(); } static void framebufferResizeCallback(GLFWwindow *window, int width, int height) { auto app = reinterpret_cast(glfwGetWindowUserPointer(window)); app->framebufferResized = true; } void initVulkan() { createInstance(); createSurface(); pickPhysicalDevice(); createLogicalDevice(); createSwapChain(); createImageViews(); createComputeDescriptorSetLayout(); createGraphicsPipeline(); createComputePipeline(); createCommandPool(); createShaderStorageBuffers(); createUniformBuffers(); createDescriptorPool(); createComputeDescriptorSets(); createCommandBuffers(); createComputeCommandBuffers(); createSyncObjects(); } void mainLoop() { while (!glfwWindowShouldClose(window)) { glfwPollEvents(); drawFrame(); // We want to animate the particle system using the last frames time to get smooth, frame-rate independent animation double currentTime = glfwGetTime(); lastFrameTime = (currentTime - lastTime) * 1000.0; lastTime = currentTime; } device.waitIdle(); } void drawFrame() { vk::Result result; uint32_t imageIndex; try { std::tie(result, imageIndex) = swapChain.acquireNextImage(UINT64_MAX, nullptr, *drawFences[frameIndex]); if (result == vk::Result::eErrorOutOfDateKHR) { recreateSwapChain(); return; } if (result != vk::Result::eSuccess && result != vk::Result::eSuboptimalKHR) { throw std::runtime_error("failed to acquire swap chain image!"); } auto fenceResult = device.waitForFences(*drawFences[frameIndex], vk::True, UINT64_MAX); if (fenceResult != vk::Result::eSuccess) { throw std::runtime_error("failed to wait for fence!"); } } catch (const vk::SystemError &e) { if (e.code().value() == static_cast(vk::Result::eErrorOutOfDateKHR)) { recreateSwapChain(); return; } else { throw; } } device.resetFences(*drawFences[frameIndex]); // Update timeline value for this frame uint64_t computeWaitValue = timelineValue; uint64_t computeSignalValue = ++timelineValue; uint64_t graphicsWaitValue = computeSignalValue; uint64_t graphicsSignalValue = ++timelineValue; updateUniformBuffer(frameIndex); { recordComputeCommandBuffer(); // Submit compute work vk::TimelineSemaphoreSubmitInfo computeTimelineInfo = { .waitSemaphoreValueCount = 1, .pWaitSemaphoreValues = &computeWaitValue, .signalSemaphoreValueCount = 1, .pSignalSemaphoreValues = &computeSignalValue, }; vk::PipelineStageFlags waitStages[] = {vk::PipelineStageFlagBits::eComputeShader}; vk::SubmitInfo computeSubmitInfo = { .pNext = &computeTimelineInfo, .waitSemaphoreCount = 1, .pWaitSemaphores = &*semaphore, .pWaitDstStageMask = waitStages, .commandBufferCount = 1, .pCommandBuffers = &*computeCommandBuffers[frameIndex], .signalSemaphoreCount = 1, .pSignalSemaphores = &*semaphore, }; queue.submit(computeSubmitInfo, nullptr); } { recordCommandBuffer(imageIndex); // Submit graphics work (waits for compute to finish) vk::PipelineStageFlags waitStage = vk::PipelineStageFlagBits::eVertexInput; vk::TimelineSemaphoreSubmitInfo graphicsTimelineInfo = { .waitSemaphoreValueCount = 1, .pWaitSemaphoreValues = &graphicsWaitValue, .signalSemaphoreValueCount = 1, .pSignalSemaphoreValues = &graphicsSignalValue, }; const vk::SubmitInfo graphicsSubmitInfo = { .pNext = &graphicsTimelineInfo, .waitSemaphoreCount = 1, .pWaitSemaphores = &*semaphore, .pWaitDstStageMask = &waitStage, .commandBufferCount = 1, .pCommandBuffers = &*commandBuffers[frameIndex], .signalSemaphoreCount = 1, .pSignalSemaphores = &*semaphore, }; queue.submit(graphicsSubmitInfo, nullptr); // Present the image (wait for graphics to finish) vk::SemaphoreWaitInfo waitInfo = { .semaphoreCount = 1, .pSemaphores = &*semaphore, .pValues = &graphicsSignalValue, }; // Wait for graphics to complete before presenting auto result = device.waitSemaphores(waitInfo, UINT64_MAX); if (result != vk::Result::eSuccess) { throw std::runtime_error("failed to wait for semaphore!"); } try { // Presentation vk::PresentInfoKHR presentInfoKHR = { .waitSemaphoreCount = 0, .pWaitSemaphores = nullptr, .swapchainCount = 1, .pSwapchains = &*swapChain, .pImageIndices = &imageIndex, }; result = queue.presentKHR(presentInfoKHR); if (result == vk::Result::eErrorOutOfDateKHR || result == vk::Result::eSuboptimalKHR || framebufferResized) { framebufferResized = false; recreateSwapChain(); } else if (result != vk::Result::eSuccess) { throw std::runtime_error("failed to present swap chain image!"); } } catch (const vk::SystemError &e) { if (e.code().value() == static_cast(vk::Result::eErrorOutOfDateKHR)) { recreateSwapChain(); return; } else { throw; } } } frameIndex = (frameIndex + 1) % MAX_FRAMES_IN_FLIGHT; } void cleanup() { cleanupSwapChain(); glfwDestroyWindow(window); glfwTerminate(); } void createInstance() { constexpr vk::ApplicationInfo appInfo { .pApplicationName = "Hello Triangle", .applicationVersion = VK_MAKE_VERSION(1, 0, 0), .pEngineName = "No Engine", .engineVersion = VK_MAKE_VERSION(1, 0, 0), .apiVersion = vk::ApiVersion14, }; // Get the required layers std::vector requiredLayers; if (enableValidationLayers) { requiredLayers.assign(validationLayers.begin(), validationLayers.end()); } // Check if the required layers are supported by the Vulkan implementation. auto layerProperties = context.enumerateInstanceLayerProperties(); if (std::ranges::any_of(requiredLayers, [&layerProperties](auto const& requiredLayer) { return std::ranges::none_of(layerProperties, [requiredLayer](auto const& layerProperty) { return strcmp(layerProperty.layerName, requiredLayer) == 0; }); })) { throw std::runtime_error("One or more required layers are not supported!"); } // Get the required instance extensions from GLFW. uint32_t glfwExtensionCount = 0; auto glfwExtensions = glfwGetRequiredInstanceExtensions(&glfwExtensionCount); // Check if the required GLFW extensions are supported by the Vulkan implementation. auto extensionProperties = context.enumerateInstanceExtensionProperties(); for (uint32_t i = 0; i < glfwExtensionCount; ++i) { if (std::ranges::none_of(extensionProperties, [glfwExtension = glfwExtensions[i]](auto const& extensionProperty) { return strcmp(extensionProperty.extensionName, glfwExtension) == 0; })) { throw std::runtime_error("Required GLFW extension not supported: " + std::string(glfwExtensions[i])); } } vk::InstanceCreateInfo createInfo { .pApplicationInfo = &appInfo, .enabledLayerCount = static_cast(requiredLayers.size()), .ppEnabledLayerNames = requiredLayers.data(), .enabledExtensionCount = glfwExtensionCount, .ppEnabledExtensionNames = glfwExtensions, }; instance = vk::raii::Instance(context, createInfo); } void createSurface() { VkSurfaceKHR _surface; if (glfwCreateWindowSurface(*instance, window, nullptr, &_surface) != 0) { throw std::runtime_error("failed to create window surface!"); } surface = vk::raii::SurfaceKHR(instance, _surface); } void pickPhysicalDevice() { std::vector deviceExtensions = { vk::KHRSwapchainExtensionName, vk::KHRSpirv14ExtensionName, vk::KHRCreateRenderpass2ExtensionName, }; auto devices = instance.enumeratePhysicalDevices(); if (devices.empty()) { throw std::runtime_error("failed to find GPUs with Vulkan support!"); } for (const auto &device : devices) { auto deviceProperties = device.getProperties(); auto deviceFeatures = device.getFeatures(); auto queueFamilies = device.getQueueFamilyProperties(); auto extensions = device.enumerateDeviceExtensionProperties(); bool isSuitable = deviceProperties.apiVersion >= VK_API_VERSION_1_3; bool extensionFound = true; const vk::QueueFamilyProperties *qf = nullptr; for (const auto &qfp : queueFamilies) { if ((qfp.queueFlags & vk::QueueFlagBits::eGraphics) != static_cast(0)) { qf = &qfp; break; } } isSuitable = isSuitable && (qf != nullptr); for (const auto &extension : deviceExtensions) { auto extensionIter = std::ranges::find_if(extensions, [extension](auto const & ext) {return strcmp(ext.extensionName, extension) == 0;}); extensionFound = extensionFound && extensionIter != extensions.end(); } isSuitable = isSuitable && extensionFound; if (isSuitable) { physicalDevice = device; return; } throw std::runtime_error("failed to find a suitable GPU"); } } void createLogicalDevice() { std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); graphicsComputeQueueIndex = findQueueFamilies(physicalDevice); float queuePriority = 0.5f; vk::DeviceQueueCreateInfo deviceQueueCreateInfo { .queueFamilyIndex = graphicsComputeQueueIndex, .queueCount = 1, .pQueuePriorities = &queuePriority, }; // Create a chain of feature structures vk::StructureChain featureChain = { {.features = { .sampleRateShading = true, .samplerAnisotropy = true, }}, // vk::PhysicalDeviceFeatures2 {.synchronization2 = true, .dynamicRendering = true}, // Enable dynamic rendering and synchronization2 from Vulkan 1.3 {.timelineSemaphore = true}, // Enable timeline semaphores from Vulkan 1.2 {.shaderDrawParameters = true}, // Enable shader draw parameters from Vulkan 1.1 {.extendedDynamicState = true} // Enable extended dynamic state from the extension }; std::vector deviceExtensions = { vk::KHRSwapchainExtensionName, vk::KHRSpirv14ExtensionName, vk::KHRSynchronization2ExtensionName, vk::KHRCreateRenderpass2ExtensionName }; vk::DeviceCreateInfo deviceCreateInfo { .pNext = &featureChain.get(), .queueCreateInfoCount = 1, .pQueueCreateInfos = &deviceQueueCreateInfo, .enabledExtensionCount = static_cast(deviceExtensions.size()), .ppEnabledExtensionNames = deviceExtensions.data(), }; device = vk::raii::Device(physicalDevice, deviceCreateInfo); queue = vk::raii::Queue(device, graphicsComputeQueueIndex, 0); } void createSwapChain() { auto surfaceCapabilities = physicalDevice.getSurfaceCapabilitiesKHR(surface); swapChainSurfaceFormat = chooseSwapSurfaceFormat(physicalDevice.getSurfaceFormatsKHR(surface)); swapChainExtent = chooseSwapExtent(surfaceCapabilities); auto minImageCount = std::max(3u, surfaceCapabilities.minImageCount); minImageCount = (surfaceCapabilities.maxImageCount > 0 && minImageCount > surfaceCapabilities.maxImageCount) ? surfaceCapabilities.maxImageCount : minImageCount; vk::SwapchainCreateInfoKHR swapChainCreateInfo { .flags = vk::SwapchainCreateFlagsKHR(), .surface = surface, .minImageCount = minImageCount, .imageFormat = swapChainSurfaceFormat.format, .imageColorSpace = swapChainSurfaceFormat.colorSpace, .imageExtent = swapChainExtent, .imageArrayLayers = 1, .imageUsage = vk::ImageUsageFlagBits::eColorAttachment, .imageSharingMode = vk::SharingMode::eExclusive, .preTransform = surfaceCapabilities.currentTransform, .compositeAlpha = vk::CompositeAlphaFlagBitsKHR::eOpaque, .presentMode = chooseSwapPresentMode(physicalDevice.getSurfacePresentModesKHR(surface)), .clipped = true, .oldSwapchain = nullptr, }; swapChain = vk::raii::SwapchainKHR(device, swapChainCreateInfo); swapChainImages = swapChain.getImages(); swapChainImageFormat = swapChainSurfaceFormat.format; } void createImageViews() { swapChainImageViews.clear(); vk::ImageViewCreateInfo imageViewCreateInfo{ .viewType = vk::ImageViewType::e2D, .format = swapChainImageFormat, .subresourceRange = { .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1, } }; for (auto image : swapChainImages) { imageViewCreateInfo.image = image; swapChainImageViews.emplace_back(vk::raii::ImageView(device, imageViewCreateInfo)); } } void createComputeDescriptorSetLayout() { std::array bindings = { vk::DescriptorSetLayoutBinding{0, vk::DescriptorType::eUniformBuffer, 1, vk::ShaderStageFlagBits::eCompute, nullptr}, vk::DescriptorSetLayoutBinding{1, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute, nullptr}, vk::DescriptorSetLayoutBinding{2, vk::DescriptorType::eStorageBuffer, 1, vk::ShaderStageFlagBits::eCompute, nullptr}, }; vk::DescriptorSetLayoutCreateInfo layoutInfo{ .bindingCount = bindings.size(), .pBindings = bindings.data(), }; computeDescriptorSetLayout = vk::raii::DescriptorSetLayout(device, layoutInfo); } void createGraphicsPipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(readFile(SHADER_FILE)); vk::PipelineShaderStageCreateInfo vertShaderStageInfo = { .stage = vk::ShaderStageFlagBits::eVertex, .module = shaderModule, .pName = "vertMain", }; vk::PipelineShaderStageCreateInfo fragShaderStageInfo = { .stage = vk::ShaderStageFlagBits::eFragment, .module = shaderModule, .pName = "fragMain", }; vk::PipelineShaderStageCreateInfo shaderStages[] = {vertShaderStageInfo, fragShaderStageInfo}; // Particles input auto bindingDescription = Particle::getBindingDescription(); auto attributeDescriptions = Particle::getAttributeDescriptions(); vk::PipelineVertexInputStateCreateInfo vertexInputInfo{ .vertexBindingDescriptionCount = 1, .pVertexBindingDescriptions = &bindingDescription, .vertexAttributeDescriptionCount = attributeDescriptions.size(), .pVertexAttributeDescriptions = attributeDescriptions.data(), }; // Input assembly vk::PipelineInputAssemblyStateCreateInfo inputAssembly = { .topology = vk::PrimitiveTopology::ePointList, .primitiveRestartEnable = vk::False, }; // Dynamic state std::vector dynamicStates = { vk::DynamicState::eViewport, vk::DynamicState::eScissor, }; vk::PipelineDynamicStateCreateInfo dynamicState = { .dynamicStateCount = static_cast(dynamicStates.size()), .pDynamicStates = dynamicStates.data(), }; // No need to specify viewport and scissor because they will be specified dynamically vk::PipelineViewportStateCreateInfo viewportState = { .viewportCount = 1, .scissorCount = 1, }; // Rasterisation vk::PipelineRasterizationStateCreateInfo rasterizer = { .depthClampEnable = vk::False, .rasterizerDiscardEnable = vk::False, .polygonMode = vk::PolygonMode::eFill, .cullMode = vk::CullModeFlagBits::eBack, .frontFace = vk::FrontFace::eCounterClockwise, .depthBiasEnable = vk::False, .depthBiasSlopeFactor = 1.0f, .lineWidth = 1.0f }; // Multisampling vk::PipelineMultisampleStateCreateInfo multisampling = { .rasterizationSamples = vk::SampleCountFlagBits::e1, .sampleShadingEnable = vk::False, }; // Color blending vk::PipelineColorBlendAttachmentState colorBlendAttachment = { .blendEnable = vk::True, .srcColorBlendFactor = vk::BlendFactor::eSrcAlpha, .dstColorBlendFactor = vk::BlendFactor::eOneMinusSrcAlpha, .colorBlendOp = vk::BlendOp::eAdd, .srcAlphaBlendFactor = vk::BlendFactor::eOneMinusSrcAlpha, .dstAlphaBlendFactor = vk::BlendFactor::eZero, .alphaBlendOp = vk::BlendOp::eAdd, .colorWriteMask = vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG | vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA, }; vk::PipelineColorBlendStateCreateInfo colorBlending = { .logicOpEnable = vk::False, .logicOp = vk::LogicOp::eCopy, .attachmentCount = 1, .pAttachments = &colorBlendAttachment, }; // Pipeline layout vk::PipelineLayoutCreateInfo pipelineLayoutInfo{}; graphicsPipelineLayout = vk::raii::PipelineLayout(device, pipelineLayoutInfo); // Dynamic rendering pipeline vk::PipelineRenderingCreateInfo pipelineRenderingCreateInfo = { .colorAttachmentCount = 1, .pColorAttachmentFormats = &swapChainImageFormat, }; vk::GraphicsPipelineCreateInfo pipelineInfo = { .pNext = &pipelineRenderingCreateInfo, .stageCount = 2, .pStages = shaderStages, .pVertexInputState = &vertexInputInfo, .pInputAssemblyState = &inputAssembly, .pViewportState = &viewportState, .pRasterizationState = &rasterizer, .pMultisampleState = &multisampling, .pColorBlendState = &colorBlending, .pDynamicState = &dynamicState, .layout = graphicsPipelineLayout, .renderPass = nullptr, }; // Create pipeline graphicsPipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); } void createComputePipeline() { vk::raii::ShaderModule shaderModule = createShaderModule(readFile(SHADER_FILE)); vk::PipelineShaderStageCreateInfo computeShaderStageInfo = { .stage = vk::ShaderStageFlagBits::eCompute, .module = shaderModule, .pName = "compMain", }; vk::PipelineLayoutCreateInfo layoutInfo = { .setLayoutCount = 1, .pSetLayouts = &*computeDescriptorSetLayout, }; computePipelineLayout = vk::raii::PipelineLayout(device, layoutInfo); vk::ComputePipelineCreateInfo pipelineInfo = { .stage = computeShaderStageInfo, .layout = *computePipelineLayout, }; computePipeline = vk::raii::Pipeline(device, nullptr, pipelineInfo); } void createCommandPool() { vk::CommandPoolCreateInfo poolInfo = { .flags = vk::CommandPoolCreateFlagBits::eResetCommandBuffer, .queueFamilyIndex = graphicsComputeQueueIndex, }; commandPool = vk::raii:: CommandPool(device, poolInfo); } void createShaderStorageBuffers() { std::default_random_engine rndEngine(static_cast(time(nullptr))); std::uniform_real_distribution rndDist(0.0f, 1.0f); // Initialise particle positions on a circle std::vector particles(PARTICLE_COUNT); for (auto &particle : particles) { float r = 0.25f * sqrtf(rndDist(rndEngine)); float theta = rndDist(rndEngine) * 2.0f * 3.14159265358979323846f; float x = r * cosf(theta) * HEIGHT / WIDTH; float y = r * sinf(theta); particle.position = glm::vec2(x, y); particle.velocity = normalize(glm::vec2(x, y)) * 0.00025f; particle.color = glm::vec4(rndDist(rndEngine), rndDist(rndEngine), rndDist(rndEngine), 1.0f); } vk::DeviceSize bufferSize = sizeof(Particle) * PARTICLE_COUNT; // Create a staging buffer used to upload data to the gpu vk::raii::Buffer stagingBuffer({}); vk::raii::DeviceMemory stagingBufferMemory({}); createBuffer(bufferSize, vk::BufferUsageFlagBits::eTransferSrc, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, stagingBuffer, stagingBufferMemory); void *dataStaging = stagingBufferMemory.mapMemory(0, bufferSize); memcpy(dataStaging, particles.data(), static_cast(bufferSize)); stagingBufferMemory.unmapMemory(); shaderStorageBuffers.clear(); shaderStorageBuffersMemory.clear(); for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { vk::raii::Buffer shaderStorageBufferTemp({}); vk::raii::DeviceMemory shaderStorageBufferTempMemory({}); createBuffer(bufferSize, vk::BufferUsageFlagBits::eStorageBuffer | vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eTransferDst, vk::MemoryPropertyFlagBits::eDeviceLocal, shaderStorageBufferTemp, shaderStorageBufferTempMemory); copyBuffer(stagingBuffer, shaderStorageBufferTemp, bufferSize); shaderStorageBuffers.emplace_back(std::move(shaderStorageBufferTemp)); shaderStorageBuffersMemory.emplace_back(std::move(shaderStorageBufferTempMemory)); } } void createUniformBuffers() { uniformBuffers.clear(); uniformBuffersMemory.clear(); unifromBuffersMapped.clear(); for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { vk::DeviceSize bufferSize = sizeof(UniformBufferObject); vk::raii::Buffer buffer({}); vk::raii::DeviceMemory bufferMem({}); createBuffer(bufferSize, vk::BufferUsageFlagBits::eUniformBuffer, vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent, buffer, bufferMem); uniformBuffers.emplace_back(std::move(buffer)); uniformBuffersMemory.emplace_back(std::move(bufferMem)); unifromBuffersMapped.emplace_back(uniformBuffersMemory[i].mapMemory(0, bufferSize)); } } void createDescriptorPool() { std::array poolSizes = { vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, MAX_FRAMES_IN_FLIGHT}, vk::DescriptorPoolSize{vk::DescriptorType::eStorageBuffer, MAX_FRAMES_IN_FLIGHT * 2}, }; vk::DescriptorPoolCreateInfo poolInfo = { .flags = vk::DescriptorPoolCreateFlagBits::eFreeDescriptorSet, .maxSets = MAX_FRAMES_IN_FLIGHT, .poolSizeCount = poolSizes.size(), .pPoolSizes = poolSizes.data(), }; descriptorPool = vk::raii::DescriptorPool(device, poolInfo); } void createComputeDescriptorSets() { std::vector layouts(MAX_FRAMES_IN_FLIGHT, computeDescriptorSetLayout); vk::DescriptorSetAllocateInfo allocInfo { .descriptorPool = *descriptorPool, .descriptorSetCount = MAX_FRAMES_IN_FLIGHT, .pSetLayouts = layouts.data(), }; computeDescriptorSets.clear(); computeDescriptorSets = device.allocateDescriptorSets(allocInfo); for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { vk::DescriptorBufferInfo bufferInfo { .buffer = uniformBuffers[i], .offset = 0, .range = sizeof(UniformBufferObject), }; vk::DescriptorBufferInfo storageBufferInfoLastFrame { .buffer = shaderStorageBuffers[(i - 1) % MAX_FRAMES_IN_FLIGHT], .offset = 0, .range = sizeof(Particle) * PARTICLE_COUNT, }; vk::DescriptorBufferInfo storageBufferInfoCurrentFrame { .buffer = shaderStorageBuffers[i], .offset = 0, .range = sizeof(Particle) * PARTICLE_COUNT, }; std::array descriptorWrites = { vk::WriteDescriptorSet{ .dstSet = computeDescriptorSets[i], .dstBinding = 0, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eUniformBuffer, .pBufferInfo = &bufferInfo, }, vk::WriteDescriptorSet{ .dstSet = computeDescriptorSets[i], .dstBinding = 1, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &storageBufferInfoLastFrame, }, vk::WriteDescriptorSet{ .dstSet = computeDescriptorSets[i], .dstBinding = 2, .dstArrayElement = 0, .descriptorCount = 1, .descriptorType = vk::DescriptorType::eStorageBuffer, .pBufferInfo = &storageBufferInfoCurrentFrame, } }; device.updateDescriptorSets(descriptorWrites, {}); } } void createCommandBuffers() { commandBuffers.clear(); vk::CommandBufferAllocateInfo allocInfo = { .commandPool = commandPool, .level = vk::CommandBufferLevel::ePrimary, .commandBufferCount = MAX_FRAMES_IN_FLIGHT, }; commandBuffers = vk::raii::CommandBuffers(device, allocInfo); } void createComputeCommandBuffers() { computeCommandBuffers.clear(); vk::CommandBufferAllocateInfo allocInfo = { .commandPool = commandPool, .level = vk::CommandBufferLevel::ePrimary, .commandBufferCount = MAX_FRAMES_IN_FLIGHT, }; computeCommandBuffers = vk::raii::CommandBuffers(device, allocInfo); } void createSyncObjects() { drawFences.clear(); vk::SemaphoreTypeCreateInfo semaphoreType { .semaphoreType = vk::SemaphoreType::eTimeline, .initialValue = 0, }; semaphore = vk::raii::Semaphore(device, {.pNext = &semaphoreType}); timelineValue = 0; for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; ++i) { vk::FenceCreateInfo fenceInfo {}; drawFences.emplace_back(device, fenceInfo); } } void copyBufferToImage(const vk::raii::Buffer& buffer, vk::raii::Image& image, uint32_t width, uint32_t height) { vk::raii::CommandBuffer commandBuffer = beginSingleTimeCommands(); vk::BufferImageCopy region { .bufferOffset = 0, .bufferRowLength = 0, .bufferImageHeight = 0, .imageSubresource = { vk::ImageAspectFlagBits::eColor, 0, 0, 1 }, .imageOffset = { 0, 0, 0 }, .imageExtent = { width, height, 1 } }; commandBuffer.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal, {region}); endSingleTimeCommands(commandBuffer); } void updateUniformBuffer(uint32_t currentImage) { UniformBufferObject ubo { .deltaTime = static_cast(lastFrameTime) * 2.0f, }; memcpy(unifromBuffersMapped[currentImage], &ubo, sizeof(ubo)); } void recordCommandBuffer(uint32_t imageIndex) { auto &commandBuffer = commandBuffers[frameIndex]; // Begin recording the command buffer commandBuffer.begin({}); // Before starting rendering, transition the swapchain image to COLOR_ATTACHMENT_OPTIMAL transitionRenderingImageLayout( swapChainImages[imageIndex], vk::ImageLayout::eUndefined, vk::ImageLayout::eColorAttachmentOptimal, {}, // srcAccessMask (no need to wait for previous operations) vk::AccessFlagBits2::eColorAttachmentWrite, // dstAccessMask vk::PipelineStageFlagBits2::eColorAttachmentOutput, // srcStage vk::PipelineStageFlagBits2::eColorAttachmentOutput, // dstStage vk::ImageAspectFlagBits::eColor // aspectFlags ); vk::ClearValue clearColor = vk::ClearColorValue(0.0f, 0.0f, 0.0f, 1.0f); vk::RenderingAttachmentInfo colorAttachmentInfo = { .imageView = swapChainImageViews[imageIndex], .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, .loadOp = vk::AttachmentLoadOp::eClear, .storeOp = vk::AttachmentStoreOp::eStore, .clearValue = clearColor, }; vk::RenderingInfo renderingInfo = { .renderArea = { .offset = { 0, 0 }, .extent = swapChainExtent }, .layerCount = 1, .colorAttachmentCount = 1, .pColorAttachments = &colorAttachmentInfo, }; commandBuffer.beginRendering(renderingInfo); commandBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, graphicsPipeline); commandBuffer.bindVertexBuffers(0, {shaderStorageBuffers[frameIndex]}, {0}); // Viewport and scissor are dynamic so we need to set them vk::Viewport viewport = { .x = 0.0f, .y = 0.0f, .width = static_cast(swapChainExtent.width), .height = static_cast(swapChainExtent.height), .minDepth = 0.0f, .maxDepth = 1.0f, }; commandBuffer.setViewport(0, viewport); commandBuffer.setScissor(0, vk::Rect2D(vk::Offset2D(0, 0), swapChainExtent)); // Issue the draw command commandBuffer.draw(PARTICLE_COUNT, 1, 0, 0); commandBuffer.endRendering(); // After rendering, transition the swapchain image to PRESENT_SRC transitionRenderingImageLayout( swapChainImages[imageIndex], vk::ImageLayout::eColorAttachmentOptimal, vk::ImageLayout::ePresentSrcKHR, vk::AccessFlagBits2::eColorAttachmentWrite, // srcAccessMask {}, // dstAccessMask vk::PipelineStageFlagBits2::eColorAttachmentOutput, // srcStage vk::PipelineStageFlagBits2::eBottomOfPipe, // dstStage vk::ImageAspectFlagBits::eColor // aspectFlags ); // Finish recording the command buffer commandBuffer.end(); } void recordComputeCommandBuffer() { auto &commandBuffer = computeCommandBuffers[frameIndex]; commandBuffer.begin({}); commandBuffer.bindPipeline(vk::PipelineBindPoint::eCompute, computePipeline); commandBuffer.bindDescriptorSets(vk::PipelineBindPoint::eCompute, computePipelineLayout, 0, {computeDescriptorSets[frameIndex]}, {}); commandBuffer.dispatch(PARTICLE_COUNT / 256, 1, 1); commandBuffer.end(); } void createBuffer( vk::DeviceSize size, vk::BufferUsageFlags usage, vk::MemoryPropertyFlags properties, vk::raii::Buffer &buffer, vk::raii::DeviceMemory &bufferMemory ) { vk::BufferCreateInfo bufferInfo = { .size = size, .usage = usage, .sharingMode = vk::SharingMode::eExclusive, }; buffer = vk::raii::Buffer(device, bufferInfo); vk::MemoryRequirements memRequirements = buffer.getMemoryRequirements(); vk::MemoryAllocateInfo memAllocateInfo = { .allocationSize = memRequirements.size, .memoryTypeIndex = findMemoryType(memRequirements.memoryTypeBits, properties) }; bufferMemory = vk::raii::DeviceMemory(device, memAllocateInfo); buffer.bindMemory(bufferMemory, 0); } void copyBuffer(vk::raii::Buffer &srcBuffer, vk::raii::Buffer &dstBuffer, vk::DeviceSize size) { vk::raii::CommandBuffer commandCopyBuffer = beginSingleTimeCommands(); commandCopyBuffer.copyBuffer(*srcBuffer, *dstBuffer, vk::BufferCopy(0, 0, size)); endSingleTimeCommands(commandCopyBuffer); } vk::raii::CommandBuffer beginSingleTimeCommands() { vk::CommandBufferAllocateInfo allocInfo { .commandPool = commandPool, .level = vk::CommandBufferLevel::ePrimary, .commandBufferCount = 1 }; vk::raii::CommandBuffer commandBuffer = std::move(device.allocateCommandBuffers(allocInfo).front()); vk::CommandBufferBeginInfo beginInfo{ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit }; commandBuffer.begin(beginInfo); return commandBuffer; } bool hasStencilComponent(vk::Format format) { return format == vk::Format::eD32SfloatS8Uint || format == vk::Format::eD24UnormS8Uint; } vk::Format findSupportedFormat(const std::vector& candidates, vk::ImageTiling tiling, vk::FormatFeatureFlags features) { for (const auto format : candidates) { vk::FormatProperties props = physicalDevice.getFormatProperties(format); if (tiling == vk::ImageTiling::eLinear && (props.linearTilingFeatures & features) == features) { return format; } if (tiling == vk::ImageTiling::eOptimal && (props.optimalTilingFeatures & features) == features) { return format; } } throw std::runtime_error("failed to find supported format!"); } void endSingleTimeCommands(vk::raii::CommandBuffer& commandBuffer) { commandBuffer.end(); vk::SubmitInfo submitInfo{ .commandBufferCount = 1, .pCommandBuffers = &*commandBuffer }; queue.submit(submitInfo, nullptr); queue.waitIdle(); } uint32_t findMemoryType(uint32_t typeFilter, vk::MemoryPropertyFlags properties) { vk::PhysicalDeviceMemoryProperties memProperties = physicalDevice.getMemoryProperties(); for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { if ((typeFilter & (1 << i) && (memProperties.memoryTypes[i].propertyFlags & properties) == properties)) { return i; } } throw std::runtime_error("failed to find suitable memory type!"); } void transitionImageLayout(const vk::raii::Image &image, vk::ImageLayout oldLayout, vk::ImageLayout newLayout, uint32_t mipLevels) { vk::raii::CommandBuffer commandBuffer = beginSingleTimeCommands(); vk::ImageMemoryBarrier barrier{ .oldLayout = oldLayout, .newLayout = newLayout, .image = image, .subresourceRange = { .aspectMask = vk::ImageAspectFlagBits::eColor, .baseMipLevel = 0, .levelCount = mipLevels, .baseArrayLayer = 0, .layerCount = 1, } }; vk::PipelineStageFlags sourceStage; vk::PipelineStageFlags destinationStage; if (oldLayout == vk::ImageLayout::eUndefined && newLayout == vk::ImageLayout::eTransferDstOptimal) { barrier.srcAccessMask = {}; barrier.dstAccessMask = vk::AccessFlagBits::eTransferWrite; sourceStage = vk::PipelineStageFlagBits::eTopOfPipe; destinationStage = vk::PipelineStageFlagBits::eTransfer; } else if (oldLayout == vk::ImageLayout::eTransferDstOptimal && newLayout == vk::ImageLayout::eShaderReadOnlyOptimal) { barrier.srcAccessMask = vk::AccessFlagBits::eTransferWrite; barrier.dstAccessMask = vk::AccessFlagBits::eShaderRead; sourceStage = vk::PipelineStageFlagBits::eTransfer; destinationStage = vk::PipelineStageFlagBits::eFragmentShader; } else { throw std::invalid_argument("unsupported layout transition!"); } commandBuffer.pipelineBarrier(sourceStage, destinationStage, {}, {}, nullptr, barrier); endSingleTimeCommands(commandBuffer); } void transitionRenderingImageLayout( vk::Image image, vk::ImageLayout oldLayout, vk::ImageLayout newLayout, vk::AccessFlags2 srcAccessMask, vk::AccessFlags2 dstAccessMask, vk::PipelineStageFlags2 srcStageMask, vk::PipelineStageFlags2 dstStageMask, vk::ImageAspectFlags aspectFlags ) { vk::ImageMemoryBarrier2 barrier = { .srcStageMask = srcStageMask, .srcAccessMask = srcAccessMask, .dstStageMask = dstStageMask, .dstAccessMask = dstAccessMask, .oldLayout = oldLayout, .newLayout = newLayout, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = image, .subresourceRange = { .aspectMask = aspectFlags, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, .layerCount = 1 } }; vk::DependencyInfo dependencyInfo = { .dependencyFlags = {}, .imageMemoryBarrierCount = 1, .pImageMemoryBarriers = &barrier }; commandBuffers[frameIndex].pipelineBarrier2(dependencyInfo); } void cleanupSwapChain() { swapChainImageViews.clear(); swapChain = nullptr; } void recreateSwapChain() { int width = 0, height = 0; glfwGetFramebufferSize(window, &width, &height); while (width == 0 || height == 0) { glfwGetFramebufferSize(window, &width, &height); glfwWaitEvents(); } device.waitIdle(); cleanupSwapChain(); createSwapChain(); createImageViews(); } [[nodiscard]] vk::raii::ShaderModule createShaderModule(const std::vector &code) const { vk::ShaderModuleCreateInfo createInfo { .codeSize = code.size() * sizeof(char), .pCode = reinterpret_cast(code.data()), }; return vk::raii::ShaderModule{device, createInfo}; } vk::SurfaceFormatKHR chooseSwapSurfaceFormat(const std::vector& availableFormats) { for (const auto& availableFormat : availableFormats) { if (availableFormat.format == vk::Format::eB8G8R8A8Srgb && availableFormat.colorSpace == vk::ColorSpaceKHR::eSrgbNonlinear) { return availableFormat; } } return availableFormats[0]; } vk::PresentModeKHR chooseSwapPresentMode(const std::vector& availablePresentModes) { for (const auto& availablePresentMode : availablePresentModes) { if (availablePresentMode == vk::PresentModeKHR::eMailbox) { return availablePresentMode; } } return vk::PresentModeKHR::eFifo; } vk::Extent2D chooseSwapExtent(const vk::SurfaceCapabilitiesKHR& capabilities) { if (capabilities.currentExtent.width != std::numeric_limits::max()) { return capabilities.currentExtent; } int width, height; glfwGetFramebufferSize(window, &width, &height); return { std::clamp(width, capabilities.minImageExtent.width, capabilities.maxImageExtent.width), std::clamp(height, capabilities.minImageExtent.height, capabilities.maxImageExtent.height), }; } uint32_t findQueueFamilies(vk::raii::PhysicalDevice physicalDevice) { // find the index of the first queue family that supports graphics std::vector queueFamilyProperties = physicalDevice.getQueueFamilyProperties(); // get the first index into queueFamilyProperties which supports graphics, compute and present uint32_t queueIndex = ~0; for (uint32_t qfpIndex = 0; qfpIndex < queueFamilyProperties.size(); ++qfpIndex) { if ((queueFamilyProperties[qfpIndex].queueFlags & vk::QueueFlagBits::eGraphics) && (queueFamilyProperties[qfpIndex].queueFlags & vk::QueueFlagBits::eCompute) && physicalDevice.getSurfaceSupportKHR(qfpIndex, *surface)) { queueIndex = qfpIndex; break; } } if (queueIndex == ~0) { throw std::runtime_error("Could not find a queue for graphics and present -> terminating"); } return queueIndex; } vk::SampleCountFlagBits getMaxUsableSampleCount() { vk::PhysicalDeviceProperties props = physicalDevice.getProperties(); vk::SampleCountFlags counts = props.limits.framebufferColorSampleCounts & props.limits.framebufferDepthSampleCounts; if (counts & vk::SampleCountFlagBits::e64) { return vk::SampleCountFlagBits::e64; } if (counts & vk::SampleCountFlagBits::e32) { return vk::SampleCountFlagBits::e32; } if (counts & vk::SampleCountFlagBits::e16) { return vk::SampleCountFlagBits::e16; } if (counts & vk::SampleCountFlagBits::e8) { return vk::SampleCountFlagBits::e8; } if (counts & vk::SampleCountFlagBits::e4) { return vk::SampleCountFlagBits::e4; } if (counts & vk::SampleCountFlagBits::e2) { return vk::SampleCountFlagBits::e2; } return vk::SampleCountFlagBits::e1; } uint32_t frameIndex = 0; double lastFrameTime = 0.0; double lastTime = 0.0; bool framebufferResized = false; GLFWwindow *window; vk::raii::Context context; vk::raii::Instance instance = nullptr; vk::raii::PhysicalDevice physicalDevice = nullptr; uint32_t graphicsComputeQueueIndex = ~0 ; vk::raii::Device device = nullptr; vk::raii::Queue queue = nullptr; vk::raii::SurfaceKHR surface = nullptr; vk::raii::SwapchainKHR swapChain = nullptr; vk::SurfaceFormatKHR swapChainSurfaceFormat; vk::Extent2D swapChainExtent; vk::Format swapChainImageFormat = vk::Format::eUndefined; std::vector swapChainImages; std::vector swapChainImageViews; vk::raii::DescriptorSetLayout computeDescriptorSetLayout = nullptr; vk::raii::DescriptorPool descriptorPool = nullptr; std::vector computeDescriptorSets; vk::raii::PipelineLayout graphicsPipelineLayout = nullptr; vk::raii::Pipeline graphicsPipeline = nullptr; vk::raii::PipelineLayout computePipelineLayout = nullptr; vk::raii::Pipeline computePipeline = nullptr; vk::raii::CommandPool commandPool = nullptr; vk::raii::Semaphore semaphore = nullptr; uint64_t timelineValue = 0; std::vector shaderStorageBuffers; std::vector shaderStorageBuffersMemory; std::vector uniformBuffers; std::vector uniformBuffersMemory; std::vector unifromBuffersMapped; std::vector commandBuffers; std::vector computeCommandBuffers; std::vector drawFences; }; int main() { ComputeShaderApplication app; try { app.run(); } catch (const std::exception &e) { std::cout << e.what() << std::endl; return EXIT_FAILURE; } return EXIT_SUCCESS; }