[Mlir-commits] [mlir] 888717e - [mlir][transform] Enable gpu-to-nvvm via conversion patterns driven by TD
Nicolas Vasilache
llvmlistbot at llvm.org
Thu Aug 10 08:30:56 PDT 2023
Author: Nicolas Vasilache
Date: 2023-08-10T15:30:48Z
New Revision: 888717e85395c22dcc70a7d16ae8459e70df822c
URL: https://github.com/llvm/llvm-project/commit/888717e85395c22dcc70a7d16ae8459e70df822c
DIFF: https://github.com/llvm/llvm-project/commit/888717e85395c22dcc70a7d16ae8459e70df822c.diff
LOG: [mlir][transform] Enable gpu-to-nvvm via conversion patterns driven by TD
This revision untangles a few more conversion pieces and allows rewriting
the relatively intricate (and somewhat inconsistent) LowerGpuOpsToNVVMOpsPass
in a declarative fashion that provides a much better understanding and control.
Differential Revision: https://reviews.llvm.org/D157617
Added:
mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-32b.mlir
Modified:
mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td
mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
Removed:
################################################################################
diff --git a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
index 288456d76fd031..143b68e43b6aa0 100644
--- a/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
+++ b/mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h
@@ -30,10 +30,12 @@ struct LogicalResult;
class ModuleOp;
class Operation;
class RewritePatternSet;
+class TypeConverter;
class Pass;
namespace gpu {
+enum class AddressSpace : uint32_t;
class GPUModuleOp;
} // namespace gpu
@@ -69,6 +71,13 @@ void populateGpuToLLVMConversionPatterns(LLVMTypeConverter &converter,
StringRef gpuBinaryAnnotation = {},
bool kernelBarePtrCallConv = false);
+/// A function that maps a MemorySpace enum to a target-specific integer value.
+using MemorySpaceMapping = std::function<unsigned(gpu::AddressSpace)>;
+
+/// Populates memory space attribute conversion rules for lowering
+/// gpu.address_space to integer values.
+void populateGpuMemorySpaceAttributeConversions(
+ TypeConverter &typeConverter, const MemorySpaceMapping &mapping);
} // namespace mlir
#endif // MLIR_CONVERSION_GPUCOMMON_GPUCOMMONPASS_H_
diff --git a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
index 7f25bf7d980c12..3a58c7ccd0997d 100644
--- a/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/GPU/TransformOps/GPUTransformOps.td
@@ -14,6 +14,61 @@ include "mlir/Dialect/Transform/IR/TransformInterfaces.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
include "mlir/IR/OpBase.td"
+//===----------------------------------------------------------------------===//
+// Apply...ConversionPatternsOp
+//===----------------------------------------------------------------------===//
+
+def ApplyGPUToNVVMConversionPatternsOp : Op<Transform_Dialect,
+ "apply_conversion_patterns.gpu.gpu_to_nvvm",
+ [DeclareOpInterfaceMethods<ConversionPatternDescriptorOpInterface,
+ ["verifyTypeConverter"]>]> {
+ let description = [{
+ Collects patterns that convert GPU dialect ops to NVVM dialect ops. These
+ patterns require an "LLVMTypeConverter".
+ }];
+ let assemblyFormat = "attr-dict";
+}
+
+def ApplyGPUWwmaToNVVMConversionPatternsOp : Op<Transform_Dialect,
+ "apply_conversion_patterns.gpu.gpu_wmma_to_nvvm",
+ [DeclareOpInterfaceMethods<ConversionPatternDescriptorOpInterface,
+ ["verifyTypeConverter"]>]> {
+ let description = [{
+ Collects patterns that convert GPU dialect ops related to wmma ops
+ to NVVM dialect ops.
+ These patterns require an "LLVMTypeConverter".
+ }];
+ let assemblyFormat = "attr-dict";
+}
+
+def ApplyGPUSubgroupReduceToNVVMConversionPatternsOp : Op<Transform_Dialect,
+ "apply_conversion_patterns.gpu.gpu_subgroup_reduce_to_nvvm",
+ [DeclareOpInterfaceMethods<ConversionPatternDescriptorOpInterface,
+ ["verifyTypeConverter"]>]> {
+ let description = [{
+ Collects patterns that convert GPU dialect ops related to wmma ops
+ to NVVM dialect ops.
+ These patterns require an "LLVMTypeConverter".
+ }];
+ let assemblyFormat = "attr-dict";
+}
+
+//===----------------------------------------------------------------------===//
+// Apply...PatternsOp
+//===----------------------------------------------------------------------===//
+
+def ApplyGPURewritePatternsOp : Op<Transform_Dialect,
+ "apply_patterns.gpu.gpu_rewrite_patterns",
+ [DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
+ let description = [{
+ Collects GPU rewrite patterns comprising:
+ 1. GpuAllReduceRewrite patterns
+ 2. GpuGlobalIdRewriter patterns
+ 3. GpuShuffleRewriter patterns
+ }];
+ let assemblyFormat = "attr-dict";
+}
+
def ApplyUnrollVectorsSubgroupMmaOp : Op<Transform_Dialect,
"apply_patterns.gpu.unroll_vectors_subgroup_mma",
[DeclareOpInterfaceMethods<PatternDescriptorOpInterface>]> {
diff --git a/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td b/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td
index 1cafef13b007c9..bce84cb3fdea08 100644
--- a/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td
+++ b/mlir/include/mlir/Dialect/NVGPU/TransformOps/NVGPUTransformOps.td
@@ -16,7 +16,7 @@ include "mlir/Dialect/Transform/IR/TransformTypes.td"
include "mlir/Interfaces/SideEffectInterfaces.td"
//===----------------------------------------------------------------------===//
-// ApplyNVGPUToNVVMConversionPatternsOp
+// Apply...ConversionPatternsOp
//===----------------------------------------------------------------------===//
def ApplyNVGPUToNVVMConversionPatternsOp : Op<Transform_Dialect,
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
index 6e8ba7a33fd8f4..f7caf025fb79bd 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp
@@ -7,6 +7,8 @@
//===----------------------------------------------------------------------===//
#include "GPUOpsLowering.h"
+
+#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
diff --git a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
index 1895c65d99964f..d61f22c9fc37df 100644
--- a/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h
@@ -111,15 +111,6 @@ struct ScalarizeVectorOpLowering : public ConvertOpToLLVMPattern<SourceOp> {
*this->getTypeConverter());
}
};
-
-/// A function that maps a MemorySpace enum to a target-specific integer value.
-using MemorySpaceMapping =
- std::function<unsigned(gpu::AddressSpace gpuAddressSpace)>;
-
-/// Populates memory space attribute conversion rules for lowering
-/// gpu.address_space to integer values.
-void populateGpuMemorySpaceAttributeConversions(
- TypeConverter &typeConverter, const MemorySpaceMapping &mapping);
} // namespace mlir
#endif // MLIR_CONVERSION_GPUCOMMON_GPUOPSLOWERING_H_
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index 669277b746234e..fbc36a65178d9d 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -16,6 +16,7 @@
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index ed40b773b3c4bf..a30f8a2e20ad77 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -17,6 +17,7 @@
#include "mlir/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.h"
#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/LLVMCommon/ConversionTarget.h"
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
@@ -24,13 +25,13 @@
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
#include "mlir/Dialect/ControlFlow/IR/ControlFlow.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/Pass/Pass.h"
diff --git a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
index 08f099ab66e064..b26788f675ce54 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
+++ b/mlir/lib/Dialect/GPU/TransformOps/CMakeLists.txt
@@ -20,4 +20,8 @@ add_mlir_dialect_library(MLIRGPUTransformOps
MLIRTransformDialect
MLIRVectorDialect
MLIRVectorTransforms
+
+ # ConversionPatterns
+ MLIRNVGPUToNVVM
+ MLIRGPUToNVVMTransforms
)
diff --git a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
index 6b3246d116dc4e..aaf337f36a6081 100644
--- a/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
+++ b/mlir/lib/Dialect/GPU/TransformOps/GPUTransformOps.cpp
@@ -8,11 +8,16 @@
#include "mlir/Dialect/GPU/TransformOps/GPUTransformOps.h"
+#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
+#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/GPU/TransformOps/Utils.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
#include "mlir/Dialect/SCF/IR/SCF.h"
@@ -29,6 +34,7 @@
#include "mlir/IR/OpDefinition.h"
#include "mlir/IR/Visitors.h"
#include "mlir/Support/LLVM.h"
+#include "mlir/Transforms/DialectConversion.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/TypeSwitch.h"
@@ -47,6 +53,85 @@ using namespace mlir::transform::gpu;
#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
#define DBGS_ALIAS() (llvm::dbgs() << '[' << DEBUG_TYPE_ALIAS << "] ")
+//===----------------------------------------------------------------------===//
+// Apply...ConversionPatternsOp
+//===----------------------------------------------------------------------===//
+
+void transform::ApplyGPUToNVVMConversionPatternsOp::populatePatterns(
+ TypeConverter &typeConverter, RewritePatternSet &patterns) {
+ auto &llvmTypeConverter = static_cast<LLVMTypeConverter &>(typeConverter);
+ // NVVM uses alloca in the default address space to represent private
+ // memory allocations, so drop private annotations. NVVM uses address
+ // space 3 for shared memory. NVVM uses the default address space to
+ // represent global memory.
+ // Used in populateGpuToNVVMConversionPatternsso attaching here for now.
+ // TODO: We should have a single to_nvvm_type_converter.
+ populateGpuMemorySpaceAttributeConversions(
+ llvmTypeConverter, [](AddressSpace space) -> unsigned {
+ switch (space) {
+ case AddressSpace::Global:
+ return static_cast<unsigned>(
+ NVVM::NVVMMemorySpace::kGlobalMemorySpace);
+ case AddressSpace::Workgroup:
+ return static_cast<unsigned>(
+ NVVM::NVVMMemorySpace::kSharedMemorySpace);
+ case AddressSpace::Private:
+ return 0;
+ }
+ llvm_unreachable("unknown address space enum value");
+ return 0;
+ });
+ // Used in GPUToNVVM/WmmaOpsToNvvm.cpp so attaching here for now.
+ // TODO: We should have a single to_nvvm_type_converter.
+ llvmTypeConverter.addConversion(
+ [&](MMAMatrixType type) -> Type { return convertMMAToLLVMType(type); });
+ populateGpuToNVVMConversionPatterns(llvmTypeConverter, patterns);
+}
+
+LogicalResult
+transform::ApplyGPUToNVVMConversionPatternsOp::verifyTypeConverter(
+ transform::TypeConverterBuilderOpInterface builder) {
+ if (builder.getTypeConverterType() != "LLVMTypeConverter")
+ return emitOpError("expected LLVMTypeConverter");
+ return success();
+}
+
+void transform::ApplyGPUWwmaToNVVMConversionPatternsOp::populatePatterns(
+ TypeConverter &typeConverter, RewritePatternSet &patterns) {
+ auto &llvmTypeConverter = static_cast<LLVMTypeConverter &>(typeConverter);
+ populateGpuWMMAToNVVMConversionPatterns(llvmTypeConverter, patterns);
+}
+
+LogicalResult
+transform::ApplyGPUWwmaToNVVMConversionPatternsOp::verifyTypeConverter(
+ transform::TypeConverterBuilderOpInterface builder) {
+ if (builder.getTypeConverterType() != "LLVMTypeConverter")
+ return emitOpError("expected LLVMTypeConverter");
+ return success();
+}
+
+void transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp::
+ populatePatterns(TypeConverter &typeConverter,
+ RewritePatternSet &patterns) {
+ auto &llvmTypeConverter = static_cast<LLVMTypeConverter &>(typeConverter);
+ populateGpuSubgroupReduceOpLoweringPattern(llvmTypeConverter, patterns);
+}
+
+LogicalResult transform::ApplyGPUSubgroupReduceToNVVMConversionPatternsOp::
+ verifyTypeConverter(transform::TypeConverterBuilderOpInterface builder) {
+ if (builder.getTypeConverterType() != "LLVMTypeConverter")
+ return emitOpError("expected LLVMTypeConverter");
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
+// Apply...PatternsOp
+//===----------------------------------------------------------------------===//s
+
+void ApplyGPURewritePatternsOp::populatePatterns(RewritePatternSet &patterns) {
+ populateGpuRewritePatterns(patterns);
+}
+
//===----------------------------------------------------------------------===//
// ApplyUnrollVectorsSubgroupMmaOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
index ca9f2ac254c587..acf4f6d0e3d697 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
@@ -387,8 +387,8 @@ struct GpuAllReduceRewriter {
static constexpr int kSubgroupSize = 32;
};
-struct GpuAllReduceConversion : public RewritePattern {
- explicit GpuAllReduceConversion(MLIRContext *context)
+struct GpuAllReduceRewrite : public RewritePattern {
+ explicit GpuAllReduceRewrite(MLIRContext *context)
: RewritePattern(gpu::GPUFuncOp::getOperationName(), 1, context) {}
LogicalResult matchAndRewrite(Operation *op,
@@ -417,5 +417,5 @@ struct GpuAllReduceConversion : public RewritePattern {
} // namespace
void mlir::populateGpuAllReducePatterns(RewritePatternSet &patterns) {
- patterns.add<GpuAllReduceConversion>(patterns.getContext());
+ patterns.add<GpuAllReduceRewrite>(patterns.getContext());
}
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-32b.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-32b.mlir
new file mode 100644
index 00000000000000..62f61629ea9d03
--- /dev/null
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm-32b.mlir
@@ -0,0 +1,77 @@
+// RUN: mlir-opt %s -convert-gpu-to-nvvm='index-bitwidth=32 use-opaque-pointers=1' -split-input-file | FileCheck %s
+
+// RUN: mlir-opt %s -test-transform-dialect-interpreter | FileCheck %s
+
+gpu.module @test_module_0 {
+ // CHECK-LABEL: func @gpu_index_ops()
+ func.func @gpu_index_ops()
+ -> (index, index, index, index, index, index,
+ index, index, index, index, index, index,
+ index) {
+ %tIdX = gpu.thread_id x
+ %tIdY = gpu.thread_id y
+ %tIdZ = gpu.thread_id z
+
+ %bDimX = gpu.block_dim x
+ %bDimY = gpu.block_dim y
+ %bDimZ = gpu.block_dim z
+
+ %bIdX = gpu.block_id x
+ %bIdY = gpu.block_id y
+ %bIdZ = gpu.block_id z
+
+ %gDimX = gpu.grid_dim x
+ %gDimY = gpu.grid_dim y
+ %gDimZ = gpu.grid_dim z
+
+ // CHECK-NOT: = llvm.sext %{{.*}} : i32 to i64
+ %laneId = gpu.lane_id
+
+ func.return %tIdX, %tIdY, %tIdZ, %bDimX, %bDimY, %bDimZ,
+ %bIdX, %bIdY, %bIdZ, %gDimX, %gDimY, %gDimZ,
+ %laneId
+ : index, index, index, index, index, index,
+ index, index, index, index, index, index,
+ index
+ }
+}
+
+
+
+gpu.module @test_module_1 {
+ // CHECK-LABEL: func @gpu_index_comp
+ func.func @gpu_index_comp(%idx : index) -> index {
+ // CHECK: = llvm.add %{{.*}}, %{{.*}} : i32
+ %0 = arith.addi %idx, %idx : index
+ // CHECK: llvm.return %{{.*}} : i32
+ func.return %0 : index
+ }
+}
+
+transform.sequence failures(propagate) {
+^bb1(%toplevel_module: !transform.any_op):
+ %gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module
+ : (!transform.any_op) -> !transform.any_op
+ transform.apply_conversion_patterns to %gpu_module {
+ transform.apply_conversion_patterns.dialect_to_llvm "arith"
+ transform.apply_conversion_patterns.dialect_to_llvm "cf"
+ transform.apply_conversion_patterns.vector.vector_to_llvm
+ transform.apply_conversion_patterns.func.func_to_llvm
+ transform.apply_conversion_patterns.dialect_to_llvm "memref"
+ transform.apply_conversion_patterns.gpu.gpu_to_nvvm
+ transform.apply_conversion_patterns.gpu.gpu_wmma_to_nvvm
+ transform.apply_conversion_patterns.gpu.gpu_subgroup_reduce_to_nvvm {has_redux = true}
+ transform.apply_conversion_patterns.nvgpu.nvgpu_to_nvvm
+ } with type_converter {
+ transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
+ {index_bitwidth = 32, use_opaque_pointers = true}
+ } {
+ legal_dialects = ["llvm", "memref", "nvvm"],
+ legal_ops = ["func.func", "gpu.module", "gpu.module_end", "gpu.yield"],
+ illegal_dialects = ["gpu"],
+ illegal_ops = ["llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil",
+ "llvm.ffloor", "llvm.log", "llvm.log10", "llvm.log2", "llvm.pow",
+ "llvm.sin", "llvm.sqrt"],
+ partial_conversion
+ } : !transform.any_op
+}
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
index 56c322dd94592b..d0e68998ff2977 100644
--- a/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-nvvm.mlir
@@ -1,14 +1,12 @@
// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 use-opaque-pointers=1' -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -convert-gpu-to-nvvm='has-redux=1 index-bitwidth=32 use-opaque-pointers=1' -split-input-file | FileCheck --check-prefix=CHECK32 %s
+// RUN: mlir-opt %s -test-transform-dialect-interpreter | FileCheck %s
-gpu.module @test_module {
+gpu.module @test_module_0 {
// CHECK-LABEL: func @gpu_index_ops()
- // CHECK32-LABEL: func @gpu_index_ops()
func.func @gpu_index_ops()
-> (index, index, index, index, index, index,
index, index, index, index, index, index,
index) {
- // CHECK32-NOT: = llvm.sext %{{.*}} : i32 to i64
// CHECK: = nvvm.read.ptx.sreg.tid.x : i32
// CHECK: = llvm.sext %{{.*}} : i32 to i64
@@ -64,24 +62,21 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_1 {
// CHECK-LABEL: func @gpu_index_comp
- // CHECK32-LABEL: func @gpu_index_comp
func.func @gpu_index_comp(%idx : index) -> index {
// CHECK: = llvm.add %{{.*}}, %{{.*}} : i64
- // CHECK32: = llvm.add %{{.*}}, %{{.*}} : i32
%0 = arith.addi %idx, %idx : index
// CHECK: llvm.return %{{.*}} : i64
- // CHECK32: llvm.return %{{.*}} : i32
func.return %0 : index
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_2 {
// CHECK-LABEL: func @gpu_all_reduce_op()
gpu.func @gpu_all_reduce_op() {
%arg0 = arith.constant 1.0 : f32
@@ -95,9 +90,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_3 {
// CHECK-LABEL: func @gpu_all_reduce_region()
gpu.func @gpu_all_reduce_region() {
%arg0 = arith.constant 1 : i32
@@ -113,9 +108,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_4 {
// CHECK-LABEL: func @gpu_shuffle()
func.func @gpu_shuffle() -> (f32, f32, f32, f32) {
// CHECK: %[[#VALUE:]] = llvm.mlir.constant(1.000000e+00 : f32) : f32
@@ -152,9 +147,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_5 {
// CHECK-LABEL: func @gpu_sync()
func.func @gpu_sync() {
// CHECK: nvvm.barrier0
@@ -163,9 +158,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_6 {
// CHECK: llvm.func @__nv_fabsf(f32) -> f32
// CHECK: llvm.func @__nv_fabs(f64) -> f64
// CHECK-LABEL: func @gpu_fabs
@@ -178,9 +173,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_7 {
// CHECK: llvm.func @__nv_cbrtf(f32) -> f32
// CHECK: llvm.func @__nv_cbrt(f64) -> f64
// CHECK-LABEL: func @gpu_cbrt
@@ -193,9 +188,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_8 {
// CHECK: llvm.func @__nv_ceilf(f32) -> f32
// CHECK: llvm.func @__nv_ceil(f64) -> f64
// CHECK-LABEL: func @gpu_ceil
@@ -208,9 +203,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_9 {
// CHECK: llvm.func @__nv_floorf(f32) -> f32
// CHECK: llvm.func @__nv_floor(f64) -> f64
// CHECK-LABEL: func @gpu_floor
@@ -223,9 +218,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_10 {
// CHECK: llvm.func @__nv_cosf(f32) -> f32
// CHECK: llvm.func @__nv_cos(f64) -> f64
// CHECK-LABEL: func @gpu_cos
@@ -238,8 +233,8 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_11 {
// CHECK: llvm.func @__nv_expf(f32) -> f32
// CHECK: llvm.func @__nv_exp(f64) -> f64
// CHECK-LABEL: func @gpu_exp
@@ -252,8 +247,8 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_12 {
// CHECK: llvm.func @__nv_exp2f(f32) -> f32
// CHECK: llvm.func @__nv_exp2(f64) -> f64
// CHECK-LABEL: func @gpu_exp2
@@ -266,9 +261,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_13 {
// CHECK: llvm.func @__nv_logf(f32) -> f32
// CHECK: llvm.func @__nv_log(f64) -> f64
// CHECK-LABEL: func @gpu_log
@@ -281,9 +276,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_14 {
// CHECK: llvm.func @__nv_log10f(f32) -> f32
// CHECK: llvm.func @__nv_log10(f64) -> f64
// CHECK-LABEL: func @gpu_log10
@@ -296,9 +291,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_15 {
// CHECK: llvm.func @__nv_log1pf(f32) -> f32
// CHECK: llvm.func @__nv_log1p(f64) -> f64
// CHECK-LABEL: func @gpu_log1p
@@ -311,9 +306,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_16 {
// CHECK: llvm.func @__nv_log2f(f32) -> f32
// CHECK: llvm.func @__nv_log2(f64) -> f64
// CHECK-LABEL: func @gpu_log2
@@ -326,9 +321,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_17 {
// CHECK: llvm.func @__nv_sinf(f32) -> f32
// CHECK: llvm.func @__nv_sin(f64) -> f64
// CHECK-LABEL: func @gpu_sin
@@ -341,9 +336,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_18 {
// CHECK: llvm.func @__nv_tanf(f32) -> f32
// CHECK: llvm.func @__nv_tan(f64) -> f64
// CHECK-LABEL: func @gpu_tan
@@ -360,9 +355,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_19 {
// CHECK: llvm.func @__nv_tanhf(f32) -> f32
// CHECK: llvm.func @__nv_tanh(f64) -> f64
// CHECK-LABEL: func @gpu_tanh
@@ -379,9 +374,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_20 {
// CHECK: llvm.func @__nv_rsqrtf(f32) -> f32
// CHECK: llvm.func @__nv_rsqrt(f64) -> f64
// CHECK-LABEL: func @gpu_rsqrt
@@ -399,9 +394,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_21 {
// CHECK: llvm.func @__nv_sqrtf(f32) -> f32
// CHECK: llvm.func @__nv_sqrt(f64) -> f64
// CHECK-LABEL: func @gpu_sqrt
@@ -419,9 +414,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_22 {
// CHECK: llvm.func @__nv_atanf(f32) -> f32
// CHECK: llvm.func @__nv_atan(f64) -> f64
// CHECK-LABEL: func @gpu_atan
@@ -439,9 +434,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_23 {
// CHECK: llvm.func @__nv_atan2f(f32, f32) -> f32
// CHECK: llvm.func @__nv_atan2(f64, f64) -> f64
// CHECK-LABEL: func @gpu_atan2
@@ -460,10 +455,10 @@ gpu.module @test_module {
}
}
-// -----
+
// Test that we handled properly operation with SymbolTable other than module op
-gpu.module @test_module {
+gpu.module @test_module_24 {
"test.symbol_scope"() ({
// CHECK: test.symbol_scope
// CHECK: llvm.func @__nv_expf(f32) -> f32
@@ -480,9 +475,9 @@ gpu.module @test_module {
}) : () -> ()
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_25 {
// CHECK: llvm.func @__nv_expm1f(f32) -> f32
// CHECK: llvm.func @__nv_expm1(f64) -> f64
// CHECK-LABEL: func @gpu_expm1
@@ -495,9 +490,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_26 {
// CHECK: llvm.func @__nv_powf(f32, f32) -> f32
// CHECK: llvm.func @__nv_pow(f64, f64) -> f64
// CHECK-LABEL: func @gpu_pow
@@ -510,9 +505,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_27 {
// CHECK-LABEL: func @gpu_unroll
func.func @gpu_unroll(%arg0 : vector<4xf32>) -> vector<4xf32> {
%result = math.exp %arg0 : vector<4xf32>
@@ -530,9 +525,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_28 {
// CHECK-LABEL: @kernel_func
// CHECK: attributes
// CHECK: gpu.kernel
@@ -542,9 +537,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_29 {
// CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL0:[A-Za-z0-9_]+]]("Hello, world\0A\00")
// CHECK-DAG: llvm.mlir.global internal constant @[[$PRINT_GLOBAL1:[A-Za-z0-9_]+]]("Hello: %d\0A\00")
// CHECK-DAG: llvm.func @vprintf(!llvm.ptr<i8>, !llvm.ptr<i8>) -> i32
@@ -580,9 +575,9 @@ gpu.module @test_module {
}
}
-// -----
-gpu.module @test_module {
+
+gpu.module @test_module_30 {
// CHECK-LABEL: func @subgroup_reduce_add
gpu.func @subgroup_reduce_add(%arg0 : i32) {
// CHECK: nvvm.redux.sync add {{.*}}
@@ -621,3 +616,38 @@ gpu.module @test_module {
}
}
+transform.sequence failures(propagate) {
+^bb1(%toplevel_module: !transform.any_op):
+ %gpu_module = transform.structured.match ops{["gpu.module"]} in %toplevel_module
+ : (!transform.any_op) -> !transform.any_op
+
+ transform.apply_patterns to %gpu_module {
+ transform.apply_patterns.gpu.gpu_rewrite_patterns
+ } : !transform.any_op
+
+ transform.apply_conversion_patterns to %gpu_module {
+ transform.apply_conversion_patterns.dialect_to_llvm "arith"
+ transform.apply_conversion_patterns.dialect_to_llvm "cf"
+ transform.apply_conversion_patterns.vector.vector_to_llvm
+ transform.apply_conversion_patterns.func.func_to_llvm
+ transform.apply_conversion_patterns.dialect_to_llvm "memref"
+ transform.apply_conversion_patterns.gpu.gpu_to_nvvm
+ transform.apply_conversion_patterns.gpu.gpu_wmma_to_nvvm
+ transform.apply_conversion_patterns.gpu.gpu_subgroup_reduce_to_nvvm
+ transform.apply_conversion_patterns.nvgpu.nvgpu_to_nvvm
+ } with type_converter {
+ transform.apply_conversion_patterns.memref.memref_to_llvm_type_converter
+ {index_bitwidth = 64,
+ use_bare_ptr = true,
+ use_bare_ptr_memref_call_conv = true,
+ use_opaque_pointers = true}
+ } {
+ legal_dialects = ["llvm", "memref", "nvvm", "test"],
+ legal_ops = ["func.func", "gpu.module", "gpu.module_end", "gpu.yield"],
+ illegal_dialects = ["gpu"],
+ illegal_ops = ["llvm.cos", "llvm.exp", "llvm.exp2", "llvm.fabs", "llvm.fceil",
+ "llvm.ffloor", "llvm.log", "llvm.log10", "llvm.log2","llvm.pow",
+ "llvm.sin", "llvm.sqrt"],
+ partial_conversion
+ } : !transform.any_op
+}
More information about the Mlir-commits
mailing list