[llvm] [mlir] [MLIR][ROCDL] Add dynamically legal ops to LowerGpuOpsToROCDLOpsPass (PR #108302)
Nirvedh Meshram via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 12 08:57:20 PDT 2024
https://github.com/nirvedhmeshram updated https://github.com/llvm/llvm-project/pull/108302
>From 4213aa70bdc45abad93c31ceaa28c652ed16b907 Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Wed, 11 Sep 2024 22:04:56 +0000
Subject: [PATCH 1/4] [MLIR][ROCDL] Add dynamically legal ops to
LowerGpuOpsToROCDLOpsPass
---
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 29926719129dc5..1265f3f48a418e 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -332,7 +332,14 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
-
+ // These ops are legal for f32 type.
+ target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>(
+ [](mlir::Operation *op) {
+ return llvm::any_of(op->getOperandTypes(), [](Type type) {
+ return llvm::isa<FloatType>(type) &&
+ type.getIntOrFloatBitWidth() == 32;
+ });
+ });
// TODO: Remove once we support replacing non-root ops.
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
}
>From 8544cfce98e2f2e5946901e7e799bfa9f55dc427 Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Wed, 11 Sep 2024 22:45:42 +0000
Subject: [PATCH 2/4] Address reviwer comments
---
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 14 ++---
.../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 54 +++++++++++++++++++
2 files changed, 61 insertions(+), 7 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 1265f3f48a418e..86e4c800c63f9e 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -26,6 +26,7 @@
#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
#include "mlir/Conversion/LLVMCommon/Pattern.h"
#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
#include "mlir/Conversion/MathToROCDL/MathToROCDL.h"
#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
#include "mlir/Conversion/VectorToLLVM/ConvertVectorToLLVM.h"
@@ -290,6 +291,7 @@ struct LowerGpuOpsToROCDLOpsPass
populateAMDGPUToROCDLConversionPatterns(converter, llvmPatterns,
*maybeChipset);
populateVectorToLLVMConversionPatterns(converter, llvmPatterns);
+ populateMathToLLVMConversionPatterns(converter, llvmPatterns);
cf::populateControlFlowToLLVMConversionPatterns(converter, llvmPatterns);
populateFuncToLLVMConversionPatterns(converter, llvmPatterns);
populateFinalizeMemRefToLLVMConversionPatterns(converter, llvmPatterns);
@@ -333,13 +335,11 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
// These ops are legal for f32 type.
- target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>(
- [](mlir::Operation *op) {
- return llvm::any_of(op->getOperandTypes(), [](Type type) {
- return llvm::isa<FloatType>(type) &&
- type.getIntOrFloatBitWidth() == 32;
- });
- });
+ target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
+ return any_of(op->getOperandTypes(), [](Type type) {
+ return isa<FloatType>(type) && type.getIntOrFloatBitWidth() == 32;
+ });
+ });
// TODO: Remove once we support replacing non-root ops.
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
}
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index b6fb08522ae1f3..7926948c0cf04c 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -131,6 +131,60 @@ gpu.module @test_module {
// -----
+gpu.module @test_module {
+ // CHECK-LABEL: func @gpu_sqrt
+ func.func @gpu_sqrt(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ %result32 = math.sqrt %arg_f32 : f32
+ // CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32
+ %result64 = math.sqrt %arg_f64 : f64
+ // CHECK: llvm.intr.sqrt(%{{.*}}) : (f64) -> f64
+ func.return %result32, %result64 : f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
+ // CHECK-LABEL: func @gpu_fabs
+ func.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ %result32 = math.absf %arg_f32 : f32
+ // CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32
+ %result64 = math.absf %arg_f64 : f64
+ // CHECK: llvm.intr.fabs(%{{.*}}) : (f64) -> f64
+ func.return %result32, %result64 : f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
+ // CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
+ // CHECK-LABEL: func @gpu_exp
+ func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ %result32 = math.exp %arg_f32 : f32
+ // CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32
+ %result64 = math.exp %arg_f64 : f64
+ // CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (f64) -> f64
+ func.return %result32, %result64 : f32, f64
+ }
+}
+
+// -----
+
+gpu.module @test_module {
+ // CHECK: llvm.func @__ocml_log_f64(f64) -> f64
+ // CHECK-LABEL: func @gpu_log
+ func.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ %result32 = math.log %arg_f32 : f32
+ // CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32
+ %result64 = math.log %arg_f64 : f64
+ // CHECK: llvm.call @__ocml_log_f64(%{{.*}}) : (f64) -> f64
+ func.return %result32, %result64 : f32, f64
+ }
+}
+
+// -----
+
gpu.module @test_module {
// CHECK: llvm.func @__ocml_cbrt_f32(f32) -> f32
// CHECK: llvm.func @__ocml_cbrt_f64(f64) -> f64
>From ff9eee95eaba0d79a141f05fb4aa7f48df2581c9 Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Thu, 12 Sep 2024 14:59:45 +0000
Subject: [PATCH 3/4] allow types narrower than f64 and update bazel
---
mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 5 +++--
utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 +
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 86e4c800c63f9e..819384a52bd878 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -334,10 +334,11 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
- // These ops are legal for f32 type.
+ // These ops are not legal for f64 type but are legal for narrower float
+ // types.
target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
return any_of(op->getOperandTypes(), [](Type type) {
- return isa<FloatType>(type) && type.getIntOrFloatBitWidth() == 32;
+ return isa<FloatType>(type) && type.getIntOrFloatBitWidth() < 64;
});
});
// TODO: Remove once we support replacing non-root ops.
diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
index c931898ed98e39..4124897722d233 100644
--- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel
@@ -6004,6 +6004,7 @@ cc_library(
":LLVMCommonConversion",
":LLVMDialect",
":MathDialect",
+ ":MathToLLVM",
":MathToROCDL",
":MemRefDialect",
":MemRefToLLVM",
>From e471b641a58ca023ff0709415fcb211602ad7d63 Mon Sep 17 00:00:00 2001
From: Nirvedh Meshram <nirvedh at gmail.com>
Date: Thu, 12 Sep 2024 15:56:53 +0000
Subject: [PATCH 4/4] address reviwer coments
---
.../GPUToROCDL/LowerGpuOpsToROCDLOps.cpp | 8 +++----
.../Conversion/GPUToROCDL/gpu-to-rocdl.mlir | 24 ++++++++++++-------
2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 819384a52bd878..fc3e1fc4f9d0c9 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -334,12 +334,10 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
target.addIllegalOp<LLVM::CosOp, LLVM::ExpOp, LLVM::Exp2Op, LLVM::FCeilOp,
LLVM::FFloorOp, LLVM::FRemOp, LLVM::LogOp, LLVM::Log10Op,
LLVM::Log2Op, LLVM::PowOp, LLVM::SinOp>();
- // These ops are not legal for f64 type but are legal for narrower float
- // types.
+ // These ops are legal for f16 and f32 type.
target.addDynamicallyLegalOp<LLVM::ExpOp, LLVM::LogOp>([](Operation *op) {
- return any_of(op->getOperandTypes(), [](Type type) {
- return isa<FloatType>(type) && type.getIntOrFloatBitWidth() < 64;
- });
+ return any_of(op->getOperandTypes(),
+ llvm::IsaPred<Float16Type, Float32Type>);
});
// TODO: Remove once we support replacing non-root ops.
target.addLegalOp<gpu::YieldOp, gpu::GPUModuleOp>();
diff --git a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
index 7926948c0cf04c..c0b62b46dcf2c1 100644
--- a/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
+++ b/mlir/test/Conversion/GPUToROCDL/gpu-to-rocdl.mlir
@@ -133,12 +133,14 @@ gpu.module @test_module {
gpu.module @test_module {
// CHECK-LABEL: func @gpu_sqrt
- func.func @gpu_sqrt(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ func.func @gpu_sqrt(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.sqrt %arg_f16 : f16
+ // CHECK: llvm.intr.sqrt(%{{.*}}) : (f16) -> f16
%result32 = math.sqrt %arg_f32 : f32
// CHECK: llvm.intr.sqrt(%{{.*}}) : (f32) -> f32
%result64 = math.sqrt %arg_f64 : f64
// CHECK: llvm.intr.sqrt(%{{.*}}) : (f64) -> f64
- func.return %result32, %result64 : f32, f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
}
}
@@ -146,12 +148,14 @@ gpu.module @test_module {
gpu.module @test_module {
// CHECK-LABEL: func @gpu_fabs
- func.func @gpu_fabs(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ func.func @gpu_fabs(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.absf %arg_f16 : f16
+ // CHECK: llvm.intr.fabs(%{{.*}}) : (f16) -> f16
%result32 = math.absf %arg_f32 : f32
// CHECK: llvm.intr.fabs(%{{.*}}) : (f32) -> f32
%result64 = math.absf %arg_f64 : f64
// CHECK: llvm.intr.fabs(%{{.*}}) : (f64) -> f64
- func.return %result32, %result64 : f32, f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
}
}
@@ -160,12 +164,14 @@ gpu.module @test_module {
gpu.module @test_module {
// CHECK: llvm.func @__ocml_exp_f64(f64) -> f64
// CHECK-LABEL: func @gpu_exp
- func.func @gpu_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ func.func @gpu_exp(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.exp %arg_f16 : f16
+ // CHECK: llvm.intr.exp(%{{.*}}) : (f16) -> f16
%result32 = math.exp %arg_f32 : f32
// CHECK: llvm.intr.exp(%{{.*}}) : (f32) -> f32
%result64 = math.exp %arg_f64 : f64
// CHECK: llvm.call @__ocml_exp_f64(%{{.*}}) : (f64) -> f64
- func.return %result32, %result64 : f32, f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
}
}
@@ -174,12 +180,14 @@ gpu.module @test_module {
gpu.module @test_module {
// CHECK: llvm.func @__ocml_log_f64(f64) -> f64
// CHECK-LABEL: func @gpu_log
- func.func @gpu_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) {
+ func.func @gpu_log(%arg_f16 : f16, %arg_f32 : f32, %arg_f64 : f64) -> (f16, f32, f64) {
+ %result16 = math.log %arg_f16 : f16
+ // CHECK: llvm.intr.log(%{{.*}}) : (f16) -> f16
%result32 = math.log %arg_f32 : f32
// CHECK: llvm.intr.log(%{{.*}}) : (f32) -> f32
%result64 = math.log %arg_f64 : f64
// CHECK: llvm.call @__ocml_log_f64(%{{.*}}) : (f64) -> f64
- func.return %result32, %result64 : f32, f64
+ func.return %result16, %result32, %result64 : f16, f32, f64
}
}
More information about the llvm-commits
mailing list