[Mlir-commits] [mlir] [mlir][AMDGPU] Add support for AMD f16 math library calls (PR #108809)

Mon Sep 16 13:28:23 PDT 2024

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 9b237b4013e86741cf7d4fcd95a7bc70772d2286 1812c25be457d7ead1dd024f62cf83d437696472 --extensions cpp,h -- mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
index 8a9414d32e..1cc02d6929 100644
--- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
+++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h
@@ -17,11 +17,12 @@
 namespace mlir {
 
 /// Rewriting that replace SourceOp with a CallOp to `f32Func` or `f64Func` or
-/// `f32ApproxFunc` or `f16Func` depending on the element type and the fastMathFlag of that
-/// Op. The function declaration is added in case it was not added before.
+/// `f32ApproxFunc` or `f16Func` depending on the element type and the
+/// fastMathFlag of that Op. The function declaration is added in case it was
+/// not added before.
 ///
-/// If the input values are of unsupported type, the value is first casted to f32, the
-/// function called and then the result casted back.
+/// If the input values are of unsupported type, the value is first casted to
+/// f32, the function called and then the result casted back.
 ///
 /// Example with NVVM:
 ///   %exp_f32 = math.exp %arg_f32 : f32
@@ -41,7 +42,8 @@ template <typename SourceOp>
 struct OpToFuncCallLowering : public ConvertOpToLLVMPattern<SourceOp> {
 public:
   explicit OpToFuncCallLowering(LLVMTypeConverter &lowering, StringRef f32Func,
-                                StringRef f64Func, StringRef f32ApproxFunc, StringRef f16Func)
+                                StringRef f64Func, StringRef f32ApproxFunc,
+                                StringRef f16Func)
       : ConvertOpToLLVMPattern<SourceOp>(lowering), f32Func(f32Func),
         f64Func(f64Func), f32ApproxFunc(f32ApproxFunc), f16Func(f16Func) {}
 
diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index f5650c35c3..4f9c4ab59d 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -335,8 +335,8 @@ void mlir::configureGpuToNVVMConversionLegality(ConversionTarget &target) {
 template <typename OpTy>
 static void populateOpPatterns(LLVMTypeConverter &converter,
                                RewritePatternSet &patterns, StringRef f32Func,
-                               StringRef f64Func,
-                               StringRef f32ApproxFunc = "", StringRef f16Func = "") {
+                               StringRef f64Func, StringRef f32ApproxFunc = "",
+                               StringRef f16Func = "") {
   patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
   patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func,
                                            f32ApproxFunc, f16Func);
diff --git a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
index 6b9e6b1192..f297cbcaa1 100644
--- a/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
+++ b/mlir/lib/Conversion/GPUToROCDL/LowerGpuOpsToROCDLOps.cpp
@@ -346,9 +346,11 @@ void mlir::configureGpuToROCDLConversionLegality(ConversionTarget &target) {
 template <typename OpTy>
 static void populateOpPatterns(LLVMTypeConverter &converter,
                                RewritePatternSet &patterns, StringRef f32Func,
-                               StringRef f64Func, StringRef f32ApproxFunc, StringRef f16Func) {
+                               StringRef f64Func, StringRef f32ApproxFunc,
+                               StringRef f16Func) {
   patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
-  patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f32ApproxFunc, f16Func);
+  patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f32ApproxFunc,
+                                           f16Func);
 }
 
 void mlir::populateGpuToROCDLConversionPatterns(
diff --git a/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp b/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
index 1611a8835c..8330713ea6 100644
--- a/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
+++ b/mlir/lib/Conversion/MathToROCDL/MathToROCDL.cpp
@@ -38,8 +38,7 @@ using namespace mlir;
 template <typename OpTy>
 static void populateOpPatterns(LLVMTypeConverter &converter,
                                RewritePatternSet &patterns, StringRef f32Func,
-                               StringRef f64Func,
-                               StringRef f16Func,
+                               StringRef f64Func, StringRef f16Func,
                                StringRef f32ApproxFunc = "") {
   patterns.add<ScalarizeVectorOpLowering<OpTy>>(converter);
   patterns.add<OpToFuncCallLowering<OpTy>>(converter, f32Func, f64Func,
@@ -87,16 +86,16 @@ void mlir::populateMathToROCDLConversionPatterns(LLVMTypeConverter &converter,
                                    "__ocml_cosh_f64", "__ocml_cosh_f16");
   populateOpPatterns<math::SinhOp>(converter, patterns, "__ocml_sinh_f32",
                                    "__ocml_sinh_f64", "__ocml_sinh_f16");
-  populateOpPatterns<math::ExpOp>(converter, patterns, "",
-                                  "__ocml_exp_f64", "__ocml_exp_f16");
+  populateOpPatterns<math::ExpOp>(converter, patterns, "", "__ocml_exp_f64",
+                                  "__ocml_exp_f16");
   populateOpPatterns<math::Exp2Op>(converter, patterns, "__ocml_exp2_f32",
                                    "__ocml_exp2_f64", "__ocml_exp2_f16");
   populateOpPatterns<math::ExpM1Op>(converter, patterns, "__ocml_expm1_f32",
                                     "__ocml_expm1_f64", "__ocml_expm1_f16");
   populateOpPatterns<math::FloorOp>(converter, patterns, "__ocml_floor_f32",
                                     "__ocml_floor_f64", "__ocml_floor_f16");
-  populateOpPatterns<math::LogOp>(converter, patterns, "",
-                                  "__ocml_log_f64", "__ocml_log_f16");
+  populateOpPatterns<math::LogOp>(converter, patterns, "", "__ocml_log_f64",
+                                  "__ocml_log_f16");
   populateOpPatterns<math::Log10Op>(converter, patterns, "__ocml_log10_f32",
                                     "__ocml_log10_f64", "__ocml_log10_f16");
   populateOpPatterns<math::Log1pOp>(converter, patterns, "__ocml_log1p_f32",

``````````

</details>


https://github.com/llvm/llvm-project/pull/108809