[Mlir-commits] [mlir] [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func` (PR #101664)
Victor Perez
llvmlistbot at llvm.org
Fri Aug 2 05:45:27 PDT 2024
https://github.com/victor-eds created https://github.com/llvm/llvm-project/pull/101664
Add support in `-convert-gpu-to-llvm-spv` to convert `gpu.func` to `llvm.func` operations.
- `spir_kernel`/`spir_func` calling conventions used for kernels/functions.
- `workgroup` attributions encoded as additional `llvm.ptr<3>` arguments.
- No attribute used to annotate kernels
- `reqd_work_group_size` attribute using to encode `gpu.known_block_size`.
>From 08332a6f1fddf9c5d161f3b79934df2cae5de11a Mon Sep 17 00:00:00 2001
From: Victor Perez <victor.perez at codeplay.com>
Date: Mon, 29 Jul 2024 13:28:47 +0100
Subject: [PATCH] [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func`
Add support in `-convert-gpu-to-llvm-spv` to convert `gpu.func` to
`llvm.func` operations.
- `spir_kernel`/`spir_func` calling conventions used for
kernels/functions.
- `workgroup` attributions encoded as additional `llvm.ptr<3>`
arguments.
- No attribute used to annotate kernels
- `reqd_work_group_size` attribute using to encode
`gpu.known_block_size`.
**Note**: A notable missing feature that will be addressed in a
follow-up PR is a `-use-bare-ptr-memref-call-conv` option to replace
MemRef arguments with bare pointers to the MemRef element types
instead of the current MemRef descriptor approach.
Signed-off-by: Victor Perez <victor.perez at codeplay.com>
---
.../SPIRVCommon/AttrToLLVMConverter.h | 18 ++
mlir/lib/Conversion/CMakeLists.txt | 1 +
.../Conversion/GPUCommon/GPUOpsLowering.cpp | 144 ++++++---
.../lib/Conversion/GPUCommon/GPUOpsLowering.h | 51 +++-
.../Conversion/GPUToLLVMSPV/CMakeLists.txt | 2 +
.../Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 25 +-
.../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp | 16 +-
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 9 +-
.../SPIRVCommon/AttrToLLVMConverter.cpp | 61 ++++
.../lib/Conversion/SPIRVCommon/CMakeLists.txt | 6 +
.../lib/Conversion/SPIRVToLLVM/CMakeLists.txt | 1 +
.../Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp | 47 +--
.../GPUToLLVMSPV/gpu-to-llvm-spv.mlir | 285 ++++++++++++++++++
13 files changed, 556 insertions(+), 110 deletions(-)
create mode 100644 mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h
create mode 100644 mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp
create mode 100644 mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt
diff --git a/mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h b/mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h
new file mode 100644
index 0000000000000..a99dd0fe6f133
--- /dev/null
+++ b/mlir/include/mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h
@@ -0,0 +1,18 @@
+//===- AttrToLLVMConverter.h - SPIR-V attributes conversion to LLVM - C++ -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_
+#define MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_
+
+#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
+
+namespace mlir {
+unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI,
+ spirv::StorageClass storageClass);
+} // namespace mlir
+
+#endif // MLIR_CONVERSION_SPIRVCOMMON_ATTRTOLLVMCONVERTER_H_
diff --git a/mlir/lib/Conversion/CMakeLists.txt b/mlir/lib/Conversion/CMakeLists.txt
index 80c8b84d9ae89..813f700c5556e 100644
--- a/mlir/lib/Conversion/CMakeLists.txt
+++ b/mlir/lib/Conversion/CMakeLists.txt
@@ -53,6 +53,7 @@ add_subdirectory(SCFToGPU)
add_subdirectory(SCFToOpenMP)
add_subdirectory(SCFToSPIRV)
add_subdirectory(ShapeToStandard)
+add_subdirectory(SPIRVCommon)
add_subdirectory(SPIRVToLLVM)
add_subdirectory(TensorToLinalg)
add_subdirectory(TensorToSPIRV)
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 6053e34f30a41..0007294b3ff27 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -25,29 +25,58 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
Location loc = gpuFuncOp.getLoc();
SmallVector<LLVM::GlobalOp, 3> workgroupBuffers;
- workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
- for (const auto [idx, attribution] :
- llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
- auto type = dyn_cast<MemRefType>(attribution.getType());
- assert(type && type.hasStaticShape() && "unexpected type in attribution");
-
- uint64_t numElements = type.getNumElements();
-
- auto elementType =
- cast<Type>(typeConverter->convertType(type.getElementType()));
- auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
- std::string name =
- std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
- uint64_t alignment = 0;
- if (auto alignAttr =
- dyn_cast_or_null<IntegerAttr>(gpuFuncOp.getWorkgroupAttributionAttr(
- idx, LLVM::LLVMDialect::getAlignAttrName())))
- alignment = alignAttr.getInt();
- auto globalOp = rewriter.create<LLVM::GlobalOp>(
- gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
- LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
- workgroupAddrSpace);
- workgroupBuffers.push_back(globalOp);
+ if (encodeWorkgroupAttributionsAsArguments) {
+ ArrayRef<BlockArgument> workgroupAttributions =
+ gpuFuncOp.getWorkgroupAttributions();
+ std::size_t numAttributions = workgroupAttributions.size();
+
+ // Insert all arguments at the end.
+ unsigned index = gpuFuncOp.getNumArguments();
+ SmallVector<unsigned> argIndices(numAttributions, index);
+
+ // New arguments will simply be `llvm.ptr` with the correct address space
+ Type workgroupPtrType =
+ rewriter.getType<LLVM::LLVMPointerType>(workgroupAddrSpace);
+ SmallVector<Type> argTypes(numAttributions, workgroupPtrType);
+
+ // No argument attributes will be added
+ DictionaryAttr emptyDict = rewriter.getDictionaryAttr({});
+ SmallVector<DictionaryAttr> argAttrs(numAttributions, emptyDict);
+
+ // Location match function location
+ SmallVector<Location> argLocs(numAttributions, gpuFuncOp.getLoc());
+
+ // Perform signature modification
+ rewriter.modifyOpInPlace(
+ gpuFuncOp, [gpuFuncOp, &argIndices, &argTypes, &argAttrs, &argLocs]() {
+ static_cast<FunctionOpInterface>(gpuFuncOp).insertArguments(
+ argIndices, argTypes, argAttrs, argLocs);
+ });
+ } else {
+ workgroupBuffers.reserve(gpuFuncOp.getNumWorkgroupAttributions());
+ for (const auto [idx, attribution] :
+ llvm::enumerate(gpuFuncOp.getWorkgroupAttributions())) {
+ auto type = dyn_cast<MemRefType>(attribution.getType());
+ assert(type && type.hasStaticShape() && "unexpected type in attribution");
+
+ uint64_t numElements = type.getNumElements();
+
+ auto elementType =
+ cast<Type>(typeConverter->convertType(type.getElementType()));
+ auto arrayType = LLVM::LLVMArrayType::get(elementType, numElements);
+ std::string name =
+ std::string(llvm::formatv("__wg_{0}_{1}", gpuFuncOp.getName(), idx));
+ uint64_t alignment = 0;
+ if (auto alignAttr = dyn_cast_or_null<IntegerAttr>(
+ gpuFuncOp.getWorkgroupAttributionAttr(
+ idx, LLVM::LLVMDialect::getAlignAttrName())))
+ alignment = alignAttr.getInt();
+ auto globalOp = rewriter.create<LLVM::GlobalOp>(
+ gpuFuncOp.getLoc(), arrayType, /*isConstant=*/false,
+ LLVM::Linkage::Internal, name, /*value=*/Attribute(), alignment,
+ workgroupAddrSpace);
+ workgroupBuffers.push_back(globalOp);
+ }
}
// Remap proper input types.
@@ -101,16 +130,20 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
// attribute. The former is necessary for further translation while the
// latter is expected by gpu.launch_func.
if (gpuFuncOp.isKernel()) {
- attributes.emplace_back(kernelAttributeName, rewriter.getUnitAttr());
+ if (kernelAttributeName)
+ attributes.emplace_back(*kernelAttributeName, rewriter.getUnitAttr());
// Set the dialect-specific block size attribute if there is one.
if (kernelBlockSizeAttributeName.has_value() && knownBlockSize) {
attributes.emplace_back(kernelBlockSizeAttributeName.value(),
knownBlockSize);
}
}
+ LLVM::CConv callingConvention = gpuFuncOp.isKernel()
+ ? kernelCallingConvention
+ : nonKernelCallingConvention;
auto llvmFuncOp = rewriter.create<LLVM::LLVMFuncOp>(
gpuFuncOp.getLoc(), gpuFuncOp.getName(), funcType,
- LLVM::Linkage::External, /*dsoLocal=*/false, /*cconv=*/LLVM::CConv::C,
+ LLVM::Linkage::External, /*dsoLocal=*/false, callingConvention,
/*comdat=*/nullptr, attributes);
{
@@ -125,24 +158,49 @@ GPUFuncOpLowering::matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
rewriter.setInsertionPointToStart(&gpuFuncOp.front());
unsigned numProperArguments = gpuFuncOp.getNumArguments();
- for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
- auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
- global.getAddrSpace());
- Value address = rewriter.create<LLVM::AddressOfOp>(
- loc, ptrType, global.getSymNameAttr());
- Value memory =
- rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(), address,
- ArrayRef<LLVM::GEPArg>{0, 0});
-
- // Build a memref descriptor pointing to the buffer to plug with the
- // existing memref infrastructure. This may use more registers than
- // otherwise necessary given that memref sizes are fixed, but we can try
- // and canonicalize that away later.
- Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
- auto type = cast<MemRefType>(attribution.getType());
- auto descr = MemRefDescriptor::fromStaticShape(
- rewriter, loc, *getTypeConverter(), type, memory);
- signatureConversion.remapInput(numProperArguments + idx, descr);
+ if (encodeWorkgroupAttributionsAsArguments) {
+ unsigned numAttributions = gpuFuncOp.getNumWorkgroupAttributions();
+ assert(numProperArguments >= numAttributions &&
+ "Expecting attributions to be encoded as arguments already");
+
+ // Arguments encoding workgroup attributions will be in positions
+ // [numProperArguments, numProperArguments+numAttributions)
+ ArrayRef<BlockArgument> attributionArguments =
+ gpuFuncOp.getArguments().slice(numProperArguments - numAttributions,
+ numAttributions);
+ for (auto [idx, vals] : llvm::enumerate(llvm::zip_equal(
+ gpuFuncOp.getWorkgroupAttributions(), attributionArguments))) {
+ auto [attribution, arg] = vals;
+ auto type = cast<MemRefType>(attribution.getType());
+
+ // Arguments are of llvm.ptr type and attributions are of memref type:
+ // we need to wrap them in memref descriptors.
+ Value descr = MemRefDescriptor::fromStaticShape(
+ rewriter, loc, *getTypeConverter(), type, arg);
+
+ // And remap the arguments
+ signatureConversion.remapInput(numProperArguments + idx, descr);
+ }
+ } else {
+ for (const auto [idx, global] : llvm::enumerate(workgroupBuffers)) {
+ auto ptrType = LLVM::LLVMPointerType::get(rewriter.getContext(),
+ global.getAddrSpace());
+ Value address = rewriter.create<LLVM::AddressOfOp>(
+ loc, ptrType, global.getSymNameAttr());
+ Value memory =
+ rewriter.create<LLVM::GEPOp>(loc, ptrType, global.getType(),
+ address, ArrayRef<LLVM::GEPArg>{0, 0});
+
+ // Build a memref descriptor pointing to the buffer to plug with the
+ // existing memref infrastructure. This may use more registers than
+ // otherwise necessary given that memref sizes are fixed, but we can try
+ // and canonicalize that away later.
+ Value attribution = gpuFuncOp.getWorkgroupAttributions()[idx];
+ auto type = cast<MemRefType>(attribution.getType());
+ auto descr = MemRefDescriptor::fromStaticShape(
+ rewriter, loc, *getTypeConverter(), type, memory);
+ signatureConversion.remapInput(numProperArguments + idx, descr);
+ }
}
// Rewrite private memory attributions to alloca'ed buffers.
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index 92e69badc27dd..781bea6b09406 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -35,16 +35,39 @@ struct GPUDynamicSharedMemoryOpLowering
unsigned alignmentBit;
};
+struct GPUFuncOpLoweringOptions {
+ /// The address space to use for `alloca`s in private memory.
+ unsigned allocaAddrSpace;
+ /// The address space to use declaring workgroup memory.
+ unsigned workgroupAddrSpace;
+
+ /// The attribute name to use instead of `gpu.kernel`.
+ std::optional<StringAttr> kernelAttributeName = std::nullopt;
+ /// The attribute name to to set block size
+ std::optional<StringAttr> kernelBlockSizeAttributeName = std::nullopt;
+
+ /// The calling convention to use for kernel functions
+ LLVM::CConv kernelCallingConvention = LLVM::CConv::C;
+ /// The calling convention to use for non-kernel functions
+ LLVM::CConv nonKernelCallingConvention = LLVM::CConv::C;
+
+ /// Whether to encode workgroup attributions as additional arguments instead
+ /// of a global variable.
+ bool encodeWorkgroupAttributionsAsArguments = false;
+};
+
struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
- GPUFuncOpLowering(
- const LLVMTypeConverter &converter, unsigned allocaAddrSpace,
- unsigned workgroupAddrSpace, StringAttr kernelAttributeName,
- std::optional<StringAttr> kernelBlockSizeAttributeName = std::nullopt)
+ GPUFuncOpLowering(const LLVMTypeConverter &converter,
+ const GPUFuncOpLoweringOptions &options)
: ConvertOpToLLVMPattern<gpu::GPUFuncOp>(converter),
- allocaAddrSpace(allocaAddrSpace),
- workgroupAddrSpace(workgroupAddrSpace),
- kernelAttributeName(kernelAttributeName),
- kernelBlockSizeAttributeName(kernelBlockSizeAttributeName) {}
+ allocaAddrSpace(options.allocaAddrSpace),
+ workgroupAddrSpace(options.workgroupAddrSpace),
+ kernelAttributeName(options.kernelAttributeName),
+ kernelBlockSizeAttributeName(options.kernelBlockSizeAttributeName),
+ kernelCallingConvention(options.kernelCallingConvention),
+ nonKernelCallingConvention(options.nonKernelCallingConvention),
+ encodeWorkgroupAttributionsAsArguments(
+ options.encodeWorkgroupAttributionsAsArguments) {}
LogicalResult
matchAndRewrite(gpu::GPUFuncOp gpuFuncOp, OpAdaptor adaptor,
@@ -57,10 +80,18 @@ struct GPUFuncOpLowering : ConvertOpToLLVMPattern<gpu::GPUFuncOp> {
unsigned workgroupAddrSpace;
/// The attribute name to use instead of `gpu.kernel`.
- StringAttr kernelAttributeName;
-
+ std::optional<StringAttr> kernelAttributeName;
/// The attribute name to to set block size
std::optional<StringAttr> kernelBlockSizeAttributeName;
+
+ /// The calling convention to use for kernel functions
+ LLVM::CConv kernelCallingConvention;
+ /// The calling convention to use for non-kernel functions
+ LLVM::CConv nonKernelCallingConvention;
+
+ /// Whether to encode workgroup attributions as additional arguments instead
+ /// of a global variable.
+ bool encodeWorkgroupAttributionsAsArguments;
};
/// The lowering of gpu.printf to a call to HIP hostcalls
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
index da5650b2b68dd..d47c5e679d86e 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/CMakeLists.txt
@@ -6,7 +6,9 @@ add_mlir_conversion_library(MLIRGPUToLLVMSPV
LINK_LIBS PUBLIC
MLIRGPUDialect
+ MLIRGPUToGPURuntimeTransforms
MLIRLLVMCommonConversion
MLIRLLVMDialect
+ MLIRSPIRVAttrToLLVMConversion
MLIRSPIRVDialect
)
diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
index 27d63b5f8948d..74dd5f19c20f5 100644
--- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
+++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp
@@ -8,15 +8,18 @@
#include "mlir/Conversion/GPUToLLVMSPV/GPUToLLVMSPVPass.h"
+#include "../GPUCommon/GPUOpsLowering.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
+#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
#include "mlir/Dialect/SPIRV/IR/TargetAndABI.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Matchers.h"
@@ -321,8 +324,8 @@ struct GPUToLLVMSPVConversionPass final
LLVMConversionTarget target(*context);
target.addIllegalOp<gpu::BarrierOp, gpu::BlockDimOp, gpu::BlockIdOp,
- gpu::GlobalIdOp, gpu::GridDimOp, gpu::ShuffleOp,
- gpu::ThreadIdOp>();
+ gpu::GPUFuncOp, gpu::GlobalIdOp, gpu::GridDimOp,
+ gpu::ReturnOp, gpu::ShuffleOp, gpu::ThreadIdOp>();
populateGpuToLLVMSPVConversionPatterns(converter, patterns);
@@ -340,11 +343,27 @@ struct GPUToLLVMSPVConversionPass final
namespace mlir {
void populateGpuToLLVMSPVConversionPatterns(LLVMTypeConverter &typeConverter,
RewritePatternSet &patterns) {
- patterns.add<GPUBarrierConversion, GPUShuffleConversion,
+ patterns.add<GPUBarrierConversion, GPUReturnOpLowering, GPUShuffleConversion,
LaunchConfigOpConversion<gpu::BlockIdOp>,
LaunchConfigOpConversion<gpu::GridDimOp>,
LaunchConfigOpConversion<gpu::BlockDimOp>,
LaunchConfigOpConversion<gpu::ThreadIdOp>,
LaunchConfigOpConversion<gpu::GlobalIdOp>>(typeConverter);
+ constexpr spirv::ClientAPI clientAPI = spirv::ClientAPI::OpenCL;
+ MLIRContext *context = &typeConverter.getContext();
+ unsigned privateAddressSpace =
+ storageClassToAddressSpace(clientAPI, spirv::StorageClass::Function);
+ unsigned localAddressSpace =
+ storageClassToAddressSpace(clientAPI, spirv::StorageClass::Workgroup);
+ OperationName llvmFuncOpName(LLVM::LLVMFuncOp::getOperationName(), context);
+ StringAttr kernelBlockSizeAttributeName =
+ LLVM::LLVMFuncOp::getReqdWorkGroupSizeAttrName(llvmFuncOpName);
+ patterns.add<GPUFuncOpLowering>(
+ typeConverter,
+ GPUFuncOpLoweringOptions{
+ privateAddressSpace, localAddressSpace,
+ /*kernelAttributeName=*/std::nullopt, kernelBlockSizeAttributeName,
+ LLVM::CConv::SPIR_KERNEL, LLVM::CConv::SPIR_FUNC,
+ /*encodeWorkgroupAttributionsAsArguments=*/true});
}
} // namespace mlir
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index faa97caacb885..060a1e1e82f75 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -365,13 +365,15 @@ void mlir::populateGpuToNVVMConversionPatterns(LLVMTypeConverter &converter,
// attributions since NVVM models it as `alloca`s in the default
// memory space and does not support `alloca`s with addrspace(5).
patterns.add<GPUFuncOpLowering>(
- converter, /*allocaAddrSpace=*/0,
- /*workgroupAddrSpace=*/
- static_cast<unsigned>(NVVM::NVVMMemorySpace::kSharedMemorySpace),
- StringAttr::get(&converter.getContext(),
- NVVM::NVVMDialect::getKernelFuncAttrName()),
- StringAttr::get(&converter.getContext(),
- NVVM::NVVMDialect::getMaxntidAttrName()));
+ converter,
+ GPUFuncOpLoweringOptions{
+ /*allocaAddrSpace=*/0,
+ /*workgroupAddrSpace=*/
+ static_cast<unsigned>(NVVM::NVVMMemorySpace::kSharedMemorySpace),
+ StringAttr::get(&converter.getContext(),
+ NVVM::NVVMDialect::getKernelFuncAttrName()),
+ StringAttr::get(&converter.getContext(),
+ NVVM::NVVMDialect::getMaxntidAttrName())});
populateOpPatterns<arith::RemFOp>(converter, patterns, "__nv_fmodf",
"__nv_fmod");
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 100181cdc69fe..564bab1ad92b9 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -372,10 +372,11 @@ void mlir::populateGpuToROCDLConversionPatterns(
patterns.add<GPUReturnOpLowering>(converter);
patterns.add<GPUFuncOpLowering>(
converter,
- /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
- /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
- rocdlDialect->getKernelAttrHelper().getName(),
- rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName());
+ GPUFuncOpLoweringOptions{
+ /*allocaAddrSpace=*/ROCDL::ROCDLDialect::kPrivateMemoryAddressSpace,
+ /*workgroupAddrSpace=*/ROCDL::ROCDLDialect::kSharedMemoryAddressSpace,
+ rocdlDialect->getKernelAttrHelper().getName(),
+ rocdlDialect->getReqdWorkGroupSizeAttrHelper().getName()});
if (Runtime::HIP == runtime) {
patterns.add<GPUPrintfOpToHIPLowering>(converter);
} else if (Runtime::OpenCL == runtime) {
diff --git a/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp b/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp
new file mode 100644
index 0000000000000..924bd1643f83b
--- /dev/null
+++ b/mlir/lib/Conversion/SPIRVCommon/AttrToLLVMConverter.cpp
@@ -0,0 +1,61 @@
+//===- AttrToLLVMConverter.cpp - SPIR-V attributes conversion to LLVM -C++ ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include <mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h>
+
+namespace {
+using namespace mlir;
+
+//===----------------------------------------------------------------------===//
+// Constants
+//===----------------------------------------------------------------------===//
+
+constexpr unsigned defaultAddressSpace = 0;
+
+//===----------------------------------------------------------------------===//
+// Utility functions
+//===----------------------------------------------------------------------===//
+
+static unsigned
+storageClassToOCLAddressSpace(spirv::StorageClass storageClass) {
+ // Based on
+ // https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_binary_form
+ // and clang/lib/Basic/Targets/SPIR.h.
+ switch (storageClass) {
+ case spirv::StorageClass::Function:
+ return 0;
+ case spirv::StorageClass::Input:
+ case spirv::StorageClass::CrossWorkgroup:
+ return 1;
+ case spirv::StorageClass::UniformConstant:
+ return 2;
+ case spirv::StorageClass::Workgroup:
+ return 3;
+ case spirv::StorageClass::Generic:
+ return 4;
+ case spirv::StorageClass::DeviceOnlyINTEL:
+ return 5;
+ case spirv::StorageClass::HostOnlyINTEL:
+ return 6;
+ default:
+ return defaultAddressSpace;
+ }
+}
+} // namespace
+
+namespace mlir {
+unsigned storageClassToAddressSpace(spirv::ClientAPI clientAPI,
+ spirv::StorageClass storageClass) {
+ switch (clientAPI) {
+ case spirv::ClientAPI::OpenCL:
+ return storageClassToOCLAddressSpace(storageClass);
+ default:
+ return defaultAddressSpace;
+ }
+}
+} // namespace mlir
diff --git a/mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt b/mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt
new file mode 100644
index 0000000000000..cd5a4c225efbf
--- /dev/null
+++ b/mlir/lib/Conversion/SPIRVCommon/CMakeLists.txt
@@ -0,0 +1,6 @@
+add_mlir_conversion_library(MLIRSPIRVAttrToLLVMConversion
+ AttrToLLVMConverter.cpp
+
+ DEPENDS
+ MLIRSPIRVEnumsIncGen
+)
diff --git a/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt b/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
index 549785b154c1b..e563315d95c9c 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/SPIRVToLLVM/CMakeLists.txt
@@ -18,6 +18,7 @@ add_mlir_conversion_library(MLIRSPIRVToLLVM
MLIRLLVMCommonConversion
MLIRLLVMDialect
MLIRMemRefToLLVM
+ MLIRSPIRVAttrToLLVMConversion
MLIRSPIRVDialect
MLIRSPIRVUtils
MLIRTransforms
diff --git a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
index da09384bfbe89..ca78631632419 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp
@@ -13,6 +13,7 @@
#include "mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/SPIRVCommon/AttrToLLVMConverter.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVDialect.h"
#include "mlir/Dialect/SPIRV/IR/SPIRVEnums.h"
@@ -28,12 +29,6 @@
using namespace mlir;
-//===----------------------------------------------------------------------===//
-// Constants
-//===----------------------------------------------------------------------===//
-
-constexpr unsigned defaultAddressSpace = 0;
-
//===----------------------------------------------------------------------===//
// Utility functions
//===----------------------------------------------------------------------===//
@@ -273,47 +268,13 @@ static std::optional<Type> convertArrayType(spirv::ArrayType type,
return LLVM::LLVMArrayType::get(llvmElementType, numElements);
}
-static unsigned mapToOpenCLAddressSpace(spirv::StorageClass storageClass) {
- // Based on
- // https://registry.khronos.org/SPIR-V/specs/unified1/OpenCL.ExtendedInstructionSet.100.html#_binary_form
- // and clang/lib/Basic/Targets/SPIR.h.
- switch (storageClass) {
-#define STORAGE_SPACE_MAP(storage, space) \
- case spirv::StorageClass::storage: \
- return space;
- STORAGE_SPACE_MAP(Function, 0)
- STORAGE_SPACE_MAP(CrossWorkgroup, 1)
- STORAGE_SPACE_MAP(Input, 1)
- STORAGE_SPACE_MAP(UniformConstant, 2)
- STORAGE_SPACE_MAP(Workgroup, 3)
- STORAGE_SPACE_MAP(Generic, 4)
- STORAGE_SPACE_MAP(DeviceOnlyINTEL, 5)
- STORAGE_SPACE_MAP(HostOnlyINTEL, 6)
-#undef STORAGE_SPACE_MAP
- default:
- return defaultAddressSpace;
- }
-}
-
-static unsigned mapToAddressSpace(spirv::ClientAPI clientAPI,
- spirv::StorageClass storageClass) {
- switch (clientAPI) {
-#define CLIENT_MAP(client, storage) \
- case spirv::ClientAPI::client: \
- return mapTo##client##AddressSpace(storage);
- CLIENT_MAP(OpenCL, storageClass)
-#undef CLIENT_MAP
- default:
- return defaultAddressSpace;
- }
-}
-
/// Converts SPIR-V pointer type to LLVM pointer. Pointer's storage class is not
/// modelled at the moment.
static Type convertPointerType(spirv::PointerType type,
LLVMTypeConverter &converter,
spirv::ClientAPI clientAPI) {
- unsigned addressSpace = mapToAddressSpace(clientAPI, type.getStorageClass());
+ unsigned addressSpace =
+ storageClassToAddressSpace(clientAPI, type.getStorageClass());
return LLVM::LLVMPointerType::get(type.getContext(), addressSpace);
}
@@ -822,7 +783,7 @@ class GlobalVariablePattern
: LLVM::Linkage::External;
auto newGlobalOp = rewriter.replaceOpWithNewOp<LLVM::GlobalOp>(
op, dstType, isConstant, linkage, op.getSymName(), Attribute(),
- /*alignment=*/0, mapToAddressSpace(clientAPI, storageClass));
+ /*alignment=*/0, storageClassToAddressSpace(clientAPI, storageClass));
// Attach location attribute if applicable
if (op.getLocationAttr())
diff --git a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
index bd7e5d139b001..ce3cc9a6137d3 100644
--- a/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
+++ b/mlir/test/Conversion/GPUToLLVMSPV/gpu-to-llvm-spv.mlir
@@ -377,3 +377,288 @@ gpu.module @shuffles_mismatch {
return
}
}
+
+// -----
+
+gpu.module @kernels {
+// CHECK: llvm.func spir_funccc @no_kernel() {
+ gpu.func @no_kernel() {
+ gpu.return
+ }
+
+// CHECK: llvm.func spir_kernelcc @kernel_no_arg() attributes {gpu.kernel} {
+ gpu.func @kernel_no_arg() kernel {
+ gpu.return
+ }
+
+// CHECK: llvm.func spir_kernelcc @kernel_with_args(%[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i64) attributes {gpu.kernel} {
+ gpu.func @kernel_with_args(%arg0: f32, %arg1: i64) kernel {
+ gpu.return
+ }
+
+// CHECK-64: llvm.func spir_kernelcc @kernel_with_conv_args(%[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: i64) attributes {gpu.kernel} {
+// CHECK-32: llvm.func spir_kernelcc @kernel_with_conv_args(%[[VAL_2:.*]]: i32, %[[VAL_3:.*]]: !llvm.ptr, %[[VAL_4:.*]]: !llvm.ptr, %[[VAL_5:.*]]: i32) attributes {gpu.kernel} {
+ gpu.func @kernel_with_conv_args(%arg0: index, %arg1: memref<index>) kernel {
+ gpu.return
+ }
+
+// CHECK-64: llvm.func spir_kernelcc @kernel_with_sized_memref(%[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr, %[[VAL_8:.*]]: i64, %[[VAL_9:.*]]: i64, %[[VAL_10:.*]]: i64) attributes {gpu.kernel} {
+// CHECK-32: llvm.func spir_kernelcc @kernel_with_sized_memref(%[[VAL_6:.*]]: !llvm.ptr, %[[VAL_7:.*]]: !llvm.ptr, %[[VAL_8:.*]]: i32, %[[VAL_9:.*]]: i32, %[[VAL_10:.*]]: i32) attributes {gpu.kernel} {
+ gpu.func @kernel_with_sized_memref(%arg0: memref<1xindex>) kernel {
+ gpu.return
+ }
+
+// CHECK-64: llvm.func spir_kernelcc @kernel_with_ND_memref(%[[VAL_11:.*]]: !llvm.ptr, %[[VAL_12:.*]]: !llvm.ptr, %[[VAL_13:.*]]: i64, %[[VAL_14:.*]]: i64, %[[VAL_15:.*]]: i64, %[[VAL_16:.*]]: i64, %[[VAL_17:.*]]: i64, %[[VAL_18:.*]]: i64, %[[VAL_19:.*]]: i64) attributes {gpu.kernel} {
+// CHECK-32: llvm.func spir_kernelcc @kernel_with_ND_memref(%[[VAL_11:.*]]: !llvm.ptr, %[[VAL_12:.*]]: !llvm.ptr, %[[VAL_13:.*]]: i32, %[[VAL_14:.*]]: i32, %[[VAL_15:.*]]: i32, %[[VAL_16:.*]]: i32, %[[VAL_17:.*]]: i32, %[[VAL_18:.*]]: i32, %[[VAL_19:.*]]: i32) attributes {gpu.kernel} {
+ gpu.func @kernel_with_ND_memref(%arg0: memref<128x128x128xindex>) kernel {
+ gpu.return
+ }
+}
+
+// -----
+
+gpu.module @kernels {
+// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_private_attribs(
+// CHECK-SAME: %[[VAL_0:.*]]: f32, %[[VAL_1:.*]]: i16) attributes {gpu.kernel} {
+// CHECK: %[[VAL_2:.*]] = llvm.mlir.constant(32 : i64) : i64
+// CHECK: %[[VAL_3:.*]] = llvm.alloca %[[VAL_2]] x f32 : (i64) -> !llvm.ptr
+
+// CHECK-64: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_6:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_7:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_8:.*]] = llvm.insertvalue %[[VAL_7]], %[[VAL_6]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_9:.*]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-64: %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_8]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_11]], %[[VAL_10]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32>
+
+// CHECK-32: %[[VAL_4:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_5:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_4]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_6:.*]] = llvm.insertvalue %[[VAL_3]], %[[VAL_5]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_7:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_8:.*]] = llvm.insertvalue %[[VAL_7]], %[[VAL_6]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_9:.*]] = llvm.mlir.constant(32 : index) : i32
+// CHECK-32: %[[VAL_10:.*]] = llvm.insertvalue %[[VAL_9]], %[[VAL_8]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_11:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_12:.*]] = llvm.insertvalue %[[VAL_11]], %[[VAL_10]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_13:.*]] = builtin.unrealized_conversion_cast %[[VAL_12]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32>
+
+// CHECK: %[[VAL_14:.*]] = llvm.mlir.constant(16 : i64) : i64
+// CHECK: %[[VAL_15:.*]] = llvm.alloca %[[VAL_14]] x i16 : (i64) -> !llvm.ptr
+
+// CHECK-64: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_18:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_19:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_20:.*]] = llvm.insertvalue %[[VAL_19]], %[[VAL_18]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_21:.*]] = llvm.mlir.constant(16 : index) : i64
+// CHECK-64: %[[VAL_22:.*]] = llvm.insertvalue %[[VAL_21]], %[[VAL_20]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_23:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_24:.*]] = llvm.insertvalue %[[VAL_23]], %[[VAL_22]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_25:.*]] = builtin.unrealized_conversion_cast %[[VAL_24]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16>
+
+// CHECK-32: %[[VAL_16:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_17:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_16]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_18:.*]] = llvm.insertvalue %[[VAL_15]], %[[VAL_17]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_19:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_20:.*]] = llvm.insertvalue %[[VAL_19]], %[[VAL_18]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_21:.*]] = llvm.mlir.constant(16 : index) : i32
+// CHECK-32: %[[VAL_22:.*]] = llvm.insertvalue %[[VAL_21]], %[[VAL_20]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_23:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_24:.*]] = llvm.insertvalue %[[VAL_23]], %[[VAL_22]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_25:.*]] = builtin.unrealized_conversion_cast %[[VAL_24]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16>
+
+// CHECK: %[[VAL_26:.*]] = arith.constant 0 : index
+// CHECK: memref.store %[[VAL_0]], %[[VAL_13]]{{\[}}%[[VAL_26]]] : memref<32xf32>
+// CHECK: memref.store %[[VAL_1]], %[[VAL_25]]{{\[}}%[[VAL_26]]] : memref<16xi16>
+ gpu.func @kernel_with_private_attribs(%arg0: f32, %arg1: i16)
+ private(%arg2: memref<32xf32>, %arg3: memref<16xi16>)
+ kernel {
+ %c0 = arith.constant 0 : index
+ memref.store %arg0, %arg2[%c0] : memref<32xf32>
+ memref.store %arg1, %arg3[%c0] : memref<16xi16>
+ gpu.return
+ }
+
+// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_workgoup_attribs(
+// CHECK-SAME: %[[VAL_27:.*]]: f32, %[[VAL_28:.*]]: i16, %[[VAL_29:.*]]: !llvm.ptr<3>, %[[VAL_30:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} {
+
+// CHECK-64: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_33:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_34:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_35:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_33]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_36:.*]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-64: %[[VAL_37:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_35]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_38:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_39:.*]] = llvm.insertvalue %[[VAL_38]], %[[VAL_37]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_40:.*]] = builtin.unrealized_conversion_cast %[[VAL_39]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32, 3>
+// CHECK-64: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_44:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_46:.*]] = llvm.mlir.constant(16 : index) : i64
+// CHECK-64: %[[VAL_47:.*]] = llvm.insertvalue %[[VAL_46]], %[[VAL_45]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_48:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_47]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_50:.*]] = builtin.unrealized_conversion_cast %[[VAL_49]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16, 3>
+
+// CHECK-32: %[[VAL_31:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_32:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_31]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_33:.*]] = llvm.insertvalue %[[VAL_29]], %[[VAL_32]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_34:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_35:.*]] = llvm.insertvalue %[[VAL_34]], %[[VAL_33]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_36:.*]] = llvm.mlir.constant(32 : index) : i32
+// CHECK-32: %[[VAL_37:.*]] = llvm.insertvalue %[[VAL_36]], %[[VAL_35]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_38:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_39:.*]] = llvm.insertvalue %[[VAL_38]], %[[VAL_37]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_40:.*]] = builtin.unrealized_conversion_cast %[[VAL_39]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32, 3>
+// CHECK-32: %[[VAL_41:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_42:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_41]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_43:.*]] = llvm.insertvalue %[[VAL_30]], %[[VAL_42]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_44:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_45:.*]] = llvm.insertvalue %[[VAL_44]], %[[VAL_43]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_46:.*]] = llvm.mlir.constant(16 : index) : i32
+// CHECK-32: %[[VAL_47:.*]] = llvm.insertvalue %[[VAL_46]], %[[VAL_45]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_48:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_49:.*]] = llvm.insertvalue %[[VAL_48]], %[[VAL_47]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_50:.*]] = builtin.unrealized_conversion_cast %[[VAL_49]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16, 3>
+
+// CHECK: %[[VAL_51:.*]] = arith.constant 0 : index
+// CHECK: memref.store %[[VAL_27]], %[[VAL_40]]{{\[}}%[[VAL_51]]] : memref<32xf32, 3>
+// CHECK: memref.store %[[VAL_28]], %[[VAL_50]]{{\[}}%[[VAL_51]]] : memref<16xi16, 3>
+ gpu.func @kernel_with_workgoup_attribs(%arg0: f32, %arg1: i16)
+ workgroup(%arg2: memref<32xf32, 3>, %arg3: memref<16xi16, 3>)
+ kernel {
+ %c0 = arith.constant 0 : index
+ memref.store %arg0, %arg2[%c0] : memref<32xf32, 3>
+ memref.store %arg1, %arg3[%c0] : memref<16xi16, 3>
+ gpu.return
+ }
+
+// CHECK-LABEL: llvm.func spir_kernelcc @kernel_with_both_attribs(
+// CHECK-64-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i64, %[[VAL_56:.*]]: !llvm.ptr<3>, %[[VAL_57:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} {
+// CHECK-32-SAME: %[[VAL_52:.*]]: f32, %[[VAL_53:.*]]: i16, %[[VAL_54:.*]]: i32, %[[VAL_55:.*]]: i32, %[[VAL_56:.*]]: !llvm.ptr<3>, %[[VAL_57:.*]]: !llvm.ptr<3>) attributes {gpu.kernel} {
+
+// CHECK-64: %[[VAL_58:.*]] = builtin.unrealized_conversion_cast %[[VAL_55]] : i64 to index
+// CHECK-64: %[[VAL_59:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_59]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_60]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_62:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_64:.*]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-64: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_66:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_67:.*]] = llvm.insertvalue %[[VAL_66]], %[[VAL_65]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_68:.*]] = builtin.unrealized_conversion_cast %[[VAL_67]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<32xf32, 3>
+// CHECK-64: %[[VAL_69:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_70:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_69]][0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_71:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_70]][1] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_72:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_73:.*]] = llvm.insertvalue %[[VAL_72]], %[[VAL_71]][2] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_74:.*]] = llvm.mlir.constant(16 : index) : i64
+// CHECK-64: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_74]], %[[VAL_73]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_76:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_78:.*]] = builtin.unrealized_conversion_cast %[[VAL_77]] : !llvm.struct<(ptr<3>, ptr<3>, i64, array<1 x i64>, array<1 x i64>)> to memref<16xi16, 3>
+
+// CHECK-32: %[[VAL_58:.*]] = builtin.unrealized_conversion_cast %[[VAL_55]] : i32 to index
+// CHECK-32: %[[VAL_59:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_60:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_59]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_61:.*]] = llvm.insertvalue %[[VAL_56]], %[[VAL_60]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_62:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_63:.*]] = llvm.insertvalue %[[VAL_62]], %[[VAL_61]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_64:.*]] = llvm.mlir.constant(32 : index) : i32
+// CHECK-32: %[[VAL_65:.*]] = llvm.insertvalue %[[VAL_64]], %[[VAL_63]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_66:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_67:.*]] = llvm.insertvalue %[[VAL_66]], %[[VAL_65]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_68:.*]] = builtin.unrealized_conversion_cast %[[VAL_67]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<32xf32, 3>
+// CHECK-32: %[[VAL_69:.*]] = llvm.mlir.undef : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_70:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_69]][0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_71:.*]] = llvm.insertvalue %[[VAL_57]], %[[VAL_70]][1] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_72:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_73:.*]] = llvm.insertvalue %[[VAL_72]], %[[VAL_71]][2] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_74:.*]] = llvm.mlir.constant(16 : index) : i32
+// CHECK-32: %[[VAL_75:.*]] = llvm.insertvalue %[[VAL_74]], %[[VAL_73]][3, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_76:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_77:.*]] = llvm.insertvalue %[[VAL_76]], %[[VAL_75]][4, 0] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_78:.*]] = builtin.unrealized_conversion_cast %[[VAL_77]] : !llvm.struct<(ptr<3>, ptr<3>, i32, array<1 x i32>, array<1 x i32>)> to memref<16xi16, 3>
+
+// CHECK: %[[VAL_79:.*]] = llvm.mlir.constant(32 : i64) : i64
+// CHECK: %[[VAL_80:.*]] = llvm.alloca %[[VAL_79]] x i32 : (i64) -> !llvm.ptr
+
+// CHECK-64: %[[VAL_81:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_82:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_81]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_82]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_84:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_85:.*]] = llvm.insertvalue %[[VAL_84]], %[[VAL_83]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_86:.*]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-64: %[[VAL_87:.*]] = llvm.insertvalue %[[VAL_86]], %[[VAL_85]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_88:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_89:.*]] = llvm.insertvalue %[[VAL_88]], %[[VAL_87]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_90:.*]] = builtin.unrealized_conversion_cast %[[VAL_89]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xi32>
+
+// CHECK-32: %[[VAL_81:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_82:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_81]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_83:.*]] = llvm.insertvalue %[[VAL_80]], %[[VAL_82]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_84:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_85:.*]] = llvm.insertvalue %[[VAL_84]], %[[VAL_83]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_86:.*]] = llvm.mlir.constant(32 : index) : i32
+// CHECK-32: %[[VAL_87:.*]] = llvm.insertvalue %[[VAL_86]], %[[VAL_85]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_88:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_89:.*]] = llvm.insertvalue %[[VAL_88]], %[[VAL_87]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_90:.*]] = builtin.unrealized_conversion_cast %[[VAL_89]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xi32>
+
+// CHECK: %[[VAL_91:.*]] = llvm.mlir.constant(32 : i64) : i64
+
+// CHECK-64: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i64 : (i64) -> !llvm.ptr
+// CHECK-32: %[[VAL_92:.*]] = llvm.alloca %[[VAL_91]] x i32 : (i64) -> !llvm.ptr
+
+// CHECK-64: %[[VAL_93:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_94:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_93]][0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_95:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_94]][1] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_96:.*]] = llvm.mlir.constant(0 : index) : i64
+// CHECK-64: %[[VAL_97:.*]] = llvm.insertvalue %[[VAL_96]], %[[VAL_95]][2] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_98:.*]] = llvm.mlir.constant(32 : index) : i64
+// CHECK-64: %[[VAL_99:.*]] = llvm.insertvalue %[[VAL_98]], %[[VAL_97]][3, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_100:.*]] = llvm.mlir.constant(1 : index) : i64
+// CHECK-64: %[[VAL_101:.*]] = llvm.insertvalue %[[VAL_100]], %[[VAL_99]][4, 0] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)>
+// CHECK-64: %[[VAL_102:.*]] = builtin.unrealized_conversion_cast %[[VAL_101]] : !llvm.struct<(ptr, ptr, i64, array<1 x i64>, array<1 x i64>)> to memref<32xindex>
+
+// CHECK-32: %[[VAL_93:.*]] = llvm.mlir.undef : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_94:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_93]][0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_95:.*]] = llvm.insertvalue %[[VAL_92]], %[[VAL_94]][1] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_96:.*]] = llvm.mlir.constant(0 : index) : i32
+// CHECK-32: %[[VAL_97:.*]] = llvm.insertvalue %[[VAL_96]], %[[VAL_95]][2] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_98:.*]] = llvm.mlir.constant(32 : index) : i32
+// CHECK-32: %[[VAL_99:.*]] = llvm.insertvalue %[[VAL_98]], %[[VAL_97]][3, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_100:.*]] = llvm.mlir.constant(1 : index) : i32
+// CHECK-32: %[[VAL_101:.*]] = llvm.insertvalue %[[VAL_100]], %[[VAL_99]][4, 0] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)>
+// CHECK-32: %[[VAL_102:.*]] = builtin.unrealized_conversion_cast %[[VAL_101]] : !llvm.struct<(ptr, ptr, i32, array<1 x i32>, array<1 x i32>)> to memref<32xindex>
+
+// CHECK: %[[VAL_103:.*]] = arith.constant 0 : index
+// CHECK: memref.store %[[VAL_52]], %[[VAL_68]]{{\[}}%[[VAL_103]]] : memref<32xf32, 3>
+// CHECK: memref.store %[[VAL_53]], %[[VAL_78]]{{\[}}%[[VAL_103]]] : memref<16xi16, 3>
+// CHECK: memref.store %[[VAL_54]], %[[VAL_90]]{{\[}}%[[VAL_103]]] : memref<32xi32>
+// CHECK: memref.store %[[VAL_58]], %[[VAL_102]]{{\[}}%[[VAL_103]]] : memref<32xindex>
+ gpu.func @kernel_with_both_attribs(%arg0: f32, %arg1: i16, %arg2: i32, %arg3: index)
+ workgroup(%arg4: memref<32xf32, 3>, %arg5: memref<16xi16, 3>)
+ private(%arg6: memref<32xi32>, %arg7: memref<32xindex>)
+ kernel {
+ %c0 = arith.constant 0 : index
+ memref.store %arg0, %arg4[%c0] : memref<32xf32, 3>
+ memref.store %arg1, %arg5[%c0] : memref<16xi16, 3>
+ memref.store %arg2, %arg6[%c0] : memref<32xi32>
+ memref.store %arg3, %arg7[%c0] : memref<32xindex>
+ gpu.return
+ }
+
+// CHECK-LABEL: llvm.func spir_kernelcc @kernel_known_block_size
+// CHECK-SAME: reqd_work_group_size = array<i32: 128, 128, 256>
+ gpu.func @kernel_known_block_size() kernel attributes {known_block_size = array<i32: 128, 128, 256>} {
+ gpu.return
+ }
+}
More information about the Mlir-commits
mailing list