[Mlir-commits] [mlir] 0e76c0a - [mlir][vulkan-runner] Make vulkan runner use GPU device memory
Thomas Raoux
llvmlistbot at llvm.org
Fri Jun 26 08:03:27 PDT 2020
Author: Thomas Raoux
Date: 2020-06-26T08:03:06-07:00
New Revision: 0e76c0a9ad96517edbf989162b22d0a12d0ef41a
URL: https://github.com/llvm/llvm-project/commit/0e76c0a9ad96517edbf989162b22d0a12d0ef41a
DIFF: https://github.com/llvm/llvm-project/commit/0e76c0a9ad96517edbf989162b22d0a12d0ef41a.diff
LOG: [mlir][vulkan-runner] Make vulkan runner use GPU device memory
To be able to have more meaningful performance out of workloadsi going through
the vulkan-runner we need to use buffers from GPU device memory as access to
system memory is significantly slower for GPU with dedicated memory. This adds
code to do a copy through staging buffer as GPU memory cannot always be mapped
on the host.
Differential Revision: https://reviews.llvm.org/D82504
Added:
Modified:
mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp
mlir/tools/mlir-vulkan-runner/VulkanRuntime.h
Removed:
################################################################################
diff --git a/mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp b/mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp
index 2c42c19badef..70812d2168fc 100644
--- a/mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp
+++ b/mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp
@@ -148,7 +148,9 @@ LogicalResult VulkanRuntime::destroy() {
// For each descriptor binding.
for (auto &memoryBuffer : deviceMemoryBuffers) {
vkFreeMemory(device, memoryBuffer.deviceMemory, nullptr);
- vkDestroyBuffer(device, memoryBuffer.buffer, nullptr);
+ vkFreeMemory(device, memoryBuffer.hostMemory, nullptr);
+ vkDestroyBuffer(device, memoryBuffer.hostBuffer, nullptr);
+ vkDestroyBuffer(device, memoryBuffer.deviceBuffer, nullptr);
}
}
@@ -181,6 +183,9 @@ LogicalResult VulkanRuntime::run() {
// Get working queue.
vkGetDeviceQueue(device, queueFamilyIndex, 0, &queue);
+ if (failed(copyResource(/*deviceToHost=*/false)))
+ return failure();
+
auto submitStart = std::chrono::high_resolution_clock::now();
// Submit command buffer into the queue.
if (failed(submitCommandBuffersToQueue()))
@@ -304,13 +309,28 @@ LogicalResult VulkanRuntime::createDevice() {
properties.memoryTypes[i].propertyFlags) &&
(memorySize <=
properties.memoryHeaps[properties.memoryTypes[i].heapIndex].size)) {
- memoryTypeIndex = i;
+ hostMemoryTypeIndex = i;
+ break;
+ }
+ }
+
+ // Find memory type memory type with VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT to be
+ // used on the device. This will allow better performance access for GPU with
+ // on device memory.
+ for (uint32_t i = 0, e = properties.memoryTypeCount; i < e; ++i) {
+ if ((VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT &
+ properties.memoryTypes[i].propertyFlags) &&
+ (memorySize <=
+ properties.memoryHeaps[properties.memoryTypes[i].heapIndex].size)) {
+ deviceMemoryTypeIndex = i;
break;
}
}
- RETURN_ON_VULKAN_ERROR(memoryTypeIndex == VK_MAX_MEMORY_TYPES ? VK_INCOMPLETE
- : VK_SUCCESS,
+ RETURN_ON_VULKAN_ERROR((hostMemoryTypeIndex == VK_MAX_MEMORY_TYPES ||
+ deviceMemoryTypeIndex == VK_MAX_MEMORY_TYPES)
+ ? VK_INCOMPLETE
+ : VK_SUCCESS,
"invalid memoryTypeIndex");
return success();
}
@@ -401,27 +421,31 @@ LogicalResult VulkanRuntime::createMemoryBuffers() {
// Set descriptor type for the specific device memory buffer.
memoryBuffer.descriptorType = descriptorType;
const auto bufferSize = resourceDataBindingPair.second.size;
-
+ memoryBuffer.bufferSize = bufferSize;
// Specify memory allocation info.
VkMemoryAllocateInfo memoryAllocateInfo = {};
memoryAllocateInfo.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memoryAllocateInfo.pNext = nullptr;
memoryAllocateInfo.allocationSize = bufferSize;
- memoryAllocateInfo.memoryTypeIndex = memoryTypeIndex;
+ memoryAllocateInfo.memoryTypeIndex = hostMemoryTypeIndex;
// Allocate device memory.
+ RETURN_ON_VULKAN_ERROR(vkAllocateMemory(device, &memoryAllocateInfo, 0,
+ &memoryBuffer.hostMemory),
+ "vkAllocateMemory");
+ memoryAllocateInfo.memoryTypeIndex = deviceMemoryTypeIndex;
RETURN_ON_VULKAN_ERROR(vkAllocateMemory(device, &memoryAllocateInfo, 0,
&memoryBuffer.deviceMemory),
"vkAllocateMemory");
void *payload;
- RETURN_ON_VULKAN_ERROR(vkMapMemory(device, memoryBuffer.deviceMemory, 0,
+ RETURN_ON_VULKAN_ERROR(vkMapMemory(device, memoryBuffer.hostMemory, 0,
bufferSize, 0,
reinterpret_cast<void **>(&payload)),
"vkMapMemory");
// Copy host memory into the mapped area.
std::memcpy(payload, resourceDataBindingPair.second.ptr, bufferSize);
- vkUnmapMemory(device, memoryBuffer.deviceMemory);
+ vkUnmapMemory(device, memoryBuffer.hostMemory);
VkBufferCreateInfo bufferCreateInfo = {};
bufferCreateInfo.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
@@ -432,17 +456,24 @@ LogicalResult VulkanRuntime::createMemoryBuffers() {
bufferCreateInfo.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
bufferCreateInfo.queueFamilyIndexCount = 1;
bufferCreateInfo.pQueueFamilyIndices = &queueFamilyIndex;
- RETURN_ON_VULKAN_ERROR(
- vkCreateBuffer(device, &bufferCreateInfo, 0, &memoryBuffer.buffer),
- "vkCreateBuffer");
+ RETURN_ON_VULKAN_ERROR(vkCreateBuffer(device, &bufferCreateInfo, 0,
+ &memoryBuffer.hostBuffer),
+ "vkCreateBuffer");
+ RETURN_ON_VULKAN_ERROR(vkCreateBuffer(device, &bufferCreateInfo, 0,
+ &memoryBuffer.deviceBuffer),
+ "vkCreateBuffer");
// Bind buffer and device memory.
- RETURN_ON_VULKAN_ERROR(vkBindBufferMemory(device, memoryBuffer.buffer,
+ RETURN_ON_VULKAN_ERROR(vkBindBufferMemory(device, memoryBuffer.hostBuffer,
+ memoryBuffer.hostMemory, 0),
+ "vkBindBufferMemory");
+ RETURN_ON_VULKAN_ERROR(vkBindBufferMemory(device,
+ memoryBuffer.deviceBuffer,
memoryBuffer.deviceMemory, 0),
"vkBindBufferMemory");
// Update buffer info.
- memoryBuffer.bufferInfo.buffer = memoryBuffer.buffer;
+ memoryBuffer.bufferInfo.buffer = memoryBuffer.deviceBuffer;
memoryBuffer.bufferInfo.offset = 0;
memoryBuffer.bufferInfo.range = VK_WHOLE_SIZE;
deviceMemoryBuffers.push_back(memoryBuffer);
@@ -454,6 +485,66 @@ LogicalResult VulkanRuntime::createMemoryBuffers() {
return success();
}
+LogicalResult VulkanRuntime::copyResource(bool deviceToHost) {
+ VkCommandBufferAllocateInfo commandBufferAllocateInfo = {
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+ NULL,
+ commandPool,
+ VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+ 1,
+ };
+ VkCommandBuffer commandBuffer;
+ RETURN_ON_VULKAN_ERROR(vkAllocateCommandBuffers(device,
+ &commandBufferAllocateInfo,
+ &commandBuffer),
+ "vkAllocateCommandBuffers");
+
+ VkCommandBufferBeginInfo commandBufferBeginInfo = {
+ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+ NULL,
+ 0,
+ NULL,
+ };
+ RETURN_ON_VULKAN_ERROR(
+ vkBeginCommandBuffer(commandBuffer, &commandBufferBeginInfo),
+ "vkBeginCommandBuffer");
+
+ for (const auto &deviceMemoryBufferMapPair : deviceMemoryBufferMap) {
+ std::vector<VkDescriptorSetLayoutBinding> descriptorSetLayoutBindings;
+ const auto &deviceMemoryBuffers = deviceMemoryBufferMapPair.second;
+ for (const auto &memBuffer : deviceMemoryBuffers) {
+ VkBufferCopy copy = {0, 0, memBuffer.bufferSize};
+ if (deviceToHost)
+ vkCmdCopyBuffer(commandBuffer, memBuffer.deviceBuffer,
+ memBuffer.hostBuffer, 1, ©);
+ else
+ vkCmdCopyBuffer(commandBuffer, memBuffer.hostBuffer,
+ memBuffer.deviceBuffer, 1, ©);
+ }
+ }
+
+ RETURN_ON_VULKAN_ERROR(vkEndCommandBuffer(commandBuffer),
+ "vkEndCommandBuffer");
+ VkSubmitInfo submitInfo = {
+ VK_STRUCTURE_TYPE_SUBMIT_INFO,
+ NULL,
+ 0,
+ NULL,
+ NULL,
+ 1,
+ &commandBuffer,
+ 0,
+ NULL,
+ };
+ submitInfo.pCommandBuffers = &commandBuffer;
+ RETURN_ON_VULKAN_ERROR(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE),
+ "vkQueueSubmit");
+ RETURN_ON_VULKAN_ERROR(vkQueueWaitIdle(queue), "vkQueueWaitIdle");
+
+ vkFreeCommandBuffers(device, commandPool, 1, &commandBuffer);
+ return success();
+}
+
LogicalResult VulkanRuntime::createShaderModule() {
VkShaderModuleCreateInfo shaderModuleCreateInfo = {};
shaderModuleCreateInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
@@ -762,6 +853,9 @@ LogicalResult VulkanRuntime::submitCommandBuffersToQueue() {
}
LogicalResult VulkanRuntime::updateHostMemoryBuffers() {
+ // First copy back the data to the staging buffer.
+ copyResource(/*deviceToHost=*/true);
+
// For each descriptor set.
for (auto &resourceDataMapPair : resourceData) {
auto &resourceDataMap = resourceDataMapPair.second;
@@ -774,12 +868,12 @@ LogicalResult VulkanRuntime::updateHostMemoryBuffers() {
auto &hostMemoryBuffer =
resourceDataMap[deviceMemoryBuffer.bindingIndex];
RETURN_ON_VULKAN_ERROR(vkMapMemory(device,
- deviceMemoryBuffer.deviceMemory, 0,
+ deviceMemoryBuffer.hostMemory, 0,
hostMemoryBuffer.size, 0,
reinterpret_cast<void **>(&payload)),
"vkMapMemory");
std::memcpy(hostMemoryBuffer.ptr, payload, hostMemoryBuffer.size);
- vkUnmapMemory(device, deviceMemoryBuffer.deviceMemory);
+ vkUnmapMemory(device, deviceMemoryBuffer.hostMemory);
}
}
}
diff --git a/mlir/tools/mlir-vulkan-runner/VulkanRuntime.h b/mlir/tools/mlir-vulkan-runner/VulkanRuntime.h
index 7efc3d6359a0..9fa52b00a0ac 100644
--- a/mlir/tools/mlir-vulkan-runner/VulkanRuntime.h
+++ b/mlir/tools/mlir-vulkan-runner/VulkanRuntime.h
@@ -29,8 +29,11 @@ struct VulkanDeviceMemoryBuffer {
BindingIndex bindingIndex{0};
VkDescriptorType descriptorType{VK_DESCRIPTOR_TYPE_MAX_ENUM};
VkDescriptorBufferInfo bufferInfo{};
- VkBuffer buffer{VK_NULL_HANDLE};
+ VkBuffer hostBuffer{VK_NULL_HANDLE};
+ VkDeviceMemory hostMemory{VK_NULL_HANDLE};
+ VkBuffer deviceBuffer{VK_NULL_HANDLE};
VkDeviceMemory deviceMemory{VK_NULL_HANDLE};
+ uint32_t bufferSize{0};
};
/// Struct containing information regarding to a host memory buffer.
@@ -137,6 +140,9 @@ class VulkanRuntime {
LogicalResult createQueryPool();
LogicalResult createComputeCommandBuffer();
LogicalResult submitCommandBuffersToQueue();
+ // Copy resources from host (staging buffer) to device buffer or from device
+ // buffer to host buffer.
+ LogicalResult copyResource(bool deviceToHost);
//===--------------------------------------------------------------------===//
// Helper methods.
@@ -203,7 +209,8 @@ class VulkanRuntime {
uint32_t queueFamilyIndex{0};
VkQueueFamilyProperties queueFamilyProperties{};
- uint32_t memoryTypeIndex{VK_MAX_MEMORY_TYPES};
+ uint32_t hostMemoryTypeIndex{VK_MAX_MEMORY_TYPES};
+ uint32_t deviceMemoryTypeIndex{VK_MAX_MEMORY_TYPES};
VkDeviceSize memorySize{0};
//===--------------------------------------------------------------------===//
More information about the Mlir-commits
mailing list