[Mlir-commits] [mlir] [mlir] enable fallback to generic LLVM lowering for math dialect in convert-gpu-to-nvvm pass (PR #165728)

Thu Oct 30 07:48:23 PDT 2025

https://github.com/yangtetris created https://github.com/llvm/llvm-project/pull/165728

### Summary
This PR improves the `convert-gpu-to-nvvm` pass to provide more comprehensive LLVM conversion by allowing the math dialect to fall back to generic LLVM lowering patterns when operations are not supported by libdevice, instead of leaving them unconverted. With this change, there is no need to append a `convert-math-to-llvm` pass after a `convert-gpu-to-nvvm` pass.
Since [[mlir][GPUToNVVM] Add benefit to populate functions](https://github.com/llvm/llvm-project/pull/128484), we no longer need to skip math dialect to prioritize gpu-to-nvvm patterns over generic LLVM patterns. In fact, arith operations like `arith.remf`, `arith.maxnumf` already use benefits to control pattern priority.
### Example
```
// Input Module
gpu.module {
  func.func @math_abs(%arg0: i16) -> i16 {
    %res = math.absi %arg0 : i16
    return %res : i16
  }
}

// Before the change: unconverted
gpu.module {
  func.func @math_abs(%arg0: i16) -> i16 {
    %res = math.absi %arg0 : i16
    return %res : i16
  }
}

// After the change:
gpu.module {
  func.func @math_abs(%arg0: i16) -> i16 {
    %res = "llvm.intr.abs"(%arg0) <{is_int_min_poison = false}> : (i16) -> i16
    return %res : i16
  }
}
```

>From 79e28d83be55211c9b1a218cb77bc8563415e27c Mon Sep 17 00:00:00 2001
From: Yang Bai <yangb at nvidia.com>
Date: Thu, 30 Oct 2025 07:23:55 -0700
Subject: [PATCH] enable fallback to generic LLVM lowering for math dialect in
 convert-gpu-to-nvvm pass

---
 .../GPUToNVVM/LowerGpuOpsToNVVMOps.cpp        |  6 ++--
 .../GPUToNVVM/gpu-to-generic-llvm.mlir        | 29 +++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 mlir/test/Conversion/GPUToNVVM/gpu-to-generic-llvm.mlir

diff --git a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
index d64c4d64cad84..70c97b3566662 100644
--- a/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
+++ b/mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp
@@ -383,16 +383,14 @@ struct LowerGpuOpsToNVVMOpsPass final
     LLVMConversionTarget target(getContext());
 
     // Set higher benefit, so patterns will run before generic LLVM lowering.
+    // Make sure the benefit here is higher than ArithToLLVMDialectInterface and
+    // MathToLLVMDialectInterface.
     populateGpuToNVVMConversionPatterns(converter, llvmPatterns,
                                         /*benefit=*/10);
 
     llvm::SmallDenseSet<StringRef> allowedDialectsSet(allowedDialects.begin(),
                                                       allowedDialects.end());
     for (Dialect *dialect : getContext().getLoadedDialects()) {
-      // Skip math patterns as nvvm needs custom math lowering.
-      if (isa<math::MathDialect>(dialect))
-        continue;
-
       bool allowed = allowedDialectsSet.contains(dialect->getNamespace());
       // Empty `allowedDialectsSet` means all dialects are allowed.
       if (!allowedDialectsSet.empty() && !allowed)
diff --git a/mlir/test/Conversion/GPUToNVVM/gpu-to-generic-llvm.mlir b/mlir/test/Conversion/GPUToNVVM/gpu-to-generic-llvm.mlir
new file mode 100644
index 0000000000000..5be7938aae8ef
--- /dev/null
+++ b/mlir/test/Conversion/GPUToNVVM/gpu-to-generic-llvm.mlir
@@ -0,0 +1,29 @@
+// RUN: mlir-opt %s -convert-gpu-to-nvvm -split-input-file | FileCheck %s
+
+/// Math/arith ops that are not supported by libdevice
+/// should be converted by generic LLVM lowering patterns.
+
+gpu.module @generic_llvm_test_module_0 {
+  // CHECK-LABEL: @arith_add
+  func.func @arith_add(%left: i64, %right: i64) -> i64 {
+    // CHECK: llvm.add {{.*}}, {{.*}} : i64
+    %result = arith.addi %left, %right : i64
+    return %result : i64
+  }
+}
+
+gpu.module @generic_llvm_test_module_1 {
+  // CHECK-LABEL: @math_abs_non_i32
+  func.func @math_abs_non_i32(%arg_i64: i64, %arg_i16: i16, %arg_i8: i8, %arg_i1: i1) 
+      -> (i64, i16, i8, i1) {
+    // CHECK: "llvm.intr.abs"{{.*}} : (i64) -> i64
+    %abs_i64 = math.absi %arg_i64 : i64
+    // CHECK: "llvm.intr.abs"{{.*}} : (i16) -> i16
+    %abs_i16 = math.absi %arg_i16 : i16
+    // CHECK: "llvm.intr.abs"{{.*}} : (i8) -> i8
+    %abs_i8 = math.absi %arg_i8 : i8
+    // CHECK: "llvm.intr.abs"{{.*}} : (i1) -> i1
+    %abs_i1 = math.absi %arg_i1 : i1
+    return %abs_i64, %abs_i16, %abs_i8, %abs_i1 : i64, i16, i8, i1
+  }
+}