[Mlir-commits] [flang] [llvm] [mlir] [flang][mlir][OpenMP] Boost inline threshold for calls inside OpenMP SIMD loops (PR #195903)

llvmlistbot at llvm.org llvmlistbot at llvm.org
Thu May 7 14:07:06 PDT 2026


https://github.com/chichunchen updated https://github.com/llvm/llvm-project/pull/195903

>From 2c12ea53c50028375046461cb2f53e22a3ab5381 Mon Sep 17 00:00:00 2001
From: "Chi Chun, Chen" <chichun.chen at hpe.com>
Date: Tue, 14 Apr 2026 18:23:13 -0500
Subject: [PATCH 1/3] [flang][mlir][OpenMP] Boost inline threshold for calls
 inside OpenMP SIMD loops

LLVM currently has no pass that generates vector function bodies (simd clones)
for OpenMP `declare simd` functions[1]. As as result, when a scalar function is
called inside an `!$omp simd` loop, LoopVectorize cannot vectorize the loop
because the call remains scalar.

This patch added a mlir pass (omp-simd-inline-boost) to mark function calls inside
`omp.simd` regions. The existing LLVM inliner uses that mark to increase inline
threshold so that LoopVectorize can widen the inlined scalar instructions naturally.

[1] https://discourse.llvm.org/t/rfc-aggressive-inlinging-for-openmp-simd-loops/90558

Assisted by Copilot.
---
 flang/lib/Optimizer/CodeGen/CodeGen.cpp       | 12 ++++
 flang/lib/Optimizer/Passes/Pipelines.cpp      |  1 +
 flang/test/Fir/simd-inline-boost-codegen.fir  | 13 +++++
 .../Integration/OpenMP/simd-inline-boost.f90  | 38 +++++++++++++
 flang/test/Lower/OpenMP/host-eval.f90         |  4 +-
 llvm/include/llvm/Analysis/InlineCost.h       |  3 +
 llvm/lib/Analysis/InlineCost.cpp              |  5 ++
 .../Inline/inline-cost-attributes.ll          |  8 ++-
 .../mlir/Dialect/OpenMP/Transforms/Passes.td  | 11 ++++
 .../Dialect/OpenMP/Transforms/CMakeLists.txt  |  1 +
 .../Transforms/OpenMPSIMDInlineBoost.cpp      | 49 ++++++++++++++++
 .../Dialect/OpenMP/simd-inline-boost.mlir     | 56 +++++++++++++++++++
 12 files changed, 198 insertions(+), 3 deletions(-)
 create mode 100644 flang/test/Fir/simd-inline-boost-codegen.fir
 create mode 100644 flang/test/Integration/OpenMP/simd-inline-boost.f90
 create mode 100644 mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
 create mode 100644 mlir/test/Dialect/OpenMP/simd-inline-boost.mlir

diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 7d1068c25e7ca..c967a92d502a8 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -775,6 +775,18 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
             call.getAccessGroups())
       llvmCall.setAccessGroups(*optionalAccessGroups);
 
+    // Boost inlining of calls inside OpenMP SIMD regions.
+    if (call->hasAttr("omp.simd_inline_boost")) {
+      mlir::NamedAttrList defaultFuncAttrs;
+      if (mlir::DictionaryAttr attrs = llvmCall.getDefaultFuncAttrsAttr())
+        defaultFuncAttrs.append(attrs.begin(), attrs.end());
+      defaultFuncAttrs.set("function-inline-threshold-bonus",
+                           rewriter.getStringAttr("2000"));
+      llvmCall.setDefaultFuncAttrsAttr(
+          defaultFuncAttrs.getDictionary(rewriter.getContext()));
+      llvmCall->removeAttr("omp.simd_inline_boost");
+    }
+
     if (memAttr)
       llvmCall.setMemoryEffectsAttr(
           mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 920d6f86a355e..77c58c8237a17 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -370,6 +370,7 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
   pm.addPass(flangomp::createAutomapToTargetDataPass());
   pm.addPass(flangomp::createMapInfoFinalizationPass());
   pm.addPass(mlir::omp::createMarkDeclareTargetPass());
+  pm.addPass(mlir::omp::createOpenMPSIMDInlineBoostPass());
 
   // Delete unreachable target operations before FunctionFilteringPass
   // extracts them.
diff --git a/flang/test/Fir/simd-inline-boost-codegen.fir b/flang/test/Fir/simd-inline-boost-codegen.fir
new file mode 100644
index 0000000000000..68d9a33ffbd0a
--- /dev/null
+++ b/flang/test/Fir/simd-inline-boost-codegen.fir
@@ -0,0 +1,13 @@
+// RUN: fir-opt --fir-to-llvm-ir %s | FileCheck %s
+
+module {
+  func.func private @foo()
+
+  func.func @test_merge_default_func_attrs() {
+    // CHECK-LABEL: llvm.func @test_merge_default_func_attrs
+    // CHECK: llvm.call @foo() {default_func_attrs = {existing = "1", "function-inline-threshold-bonus" = "2000"}} : () -> ()
+    // CHECK-NOT: omp.simd_inline_boost
+    fir.call @foo() {default_func_attrs = {existing = "1"}, omp.simd_inline_boost} : () -> ()
+    return
+  }
+}
diff --git a/flang/test/Integration/OpenMP/simd-inline-boost.f90 b/flang/test/Integration/OpenMP/simd-inline-boost.f90
new file mode 100644
index 0000000000000..28000b2583bd6
--- /dev/null
+++ b/flang/test/Integration/OpenMP/simd-inline-boost.f90
@@ -0,0 +1,38 @@
+! Test that function calls inside !$omp simd loops get boosted inline thresholds.
+!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: define {{.*}} @test_simd_
+subroutine test_simd(x, n)
+  implicit none
+  integer, intent(in) :: n
+  real, intent(inout) :: x(n)
+  integer :: i
+  interface
+    real function foo(v)
+      real, intent(in) :: v
+    end function
+  end interface
+  !$omp simd
+  do i = 1, n
+    ! CHECK: call {{.*}}@foo_({{.*}}) #[[BOOST:[0-9]+]]
+    x(i) = foo(x(i))
+  end do
+  !$omp end simd
+end subroutine
+
+! Calls outside !$omp simd should NOT get the attribute.
+! CHECK-LABEL: define {{.*}} @no_simd_
+subroutine no_simd(x)
+  implicit none
+  real, intent(inout) :: x
+  interface
+    real function foo(v)
+      real, intent(in) :: v
+    end function
+  end interface
+  ! CHECK: call {{.*}}@foo_({{.*}})
+  ! CHECK-NOT: call {{.*}}@foo_({{.*}}) #[[BOOST]]
+  x = foo(x)
+end subroutine
+
+! CHECK: attributes #[[BOOST]] = {{{.*}}"function-inline-threshold-bonus"="2000"{{.*}}}
diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90
index 7a9c08895189d..96a6b5e00d630 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -160,7 +160,7 @@ subroutine distribute_parallel_do_simd()
   ! DEVICE-NOT: omp.parallel
   ! DEVICE-NOT: omp.distribute
   ! DEVICE-NOT: omp.wsloop
-  ! DEVICE-NOT: omp.simd
+  ! DEVICE-NOT: {{^ *}}omp.simd{{[ {]}}
   !$omp distribute parallel do simd num_threads(1)
   do i=1,10
     call foo()
@@ -269,7 +269,7 @@ subroutine distribute_simd()
   ! HOST-NEXT: omp.simd
 
   ! DEVICE-NOT: omp.distribute
-  ! DEVICE-NOT: omp.simd
+  ! DEVICE-NOT: {{^ *}}omp.simd{{[ {]}}
   !$omp distribute simd
   do i=1,10
     call foo()
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 1faf480c590ac..4ab50b6ba75cc 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -60,6 +60,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
 const char FunctionInlineCostMultiplierAttributeName[] =
     "function-inline-cost-multiplier";
 
+const char FunctionInlineThresholdBonusAttributeName[] =
+    "function-inline-threshold-bonus";
+
 const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize";
 } // namespace InlineConstants
 
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d975a93e9b1fd..fb1163fb24d31 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1120,6 +1120,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
             getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
       Threshold = *AttrThreshold;
 
+    if (std::optional<int> AttrThresholdBonus = getStringFnAttrAsInt(
+            CandidateCall,
+            InlineConstants::FunctionInlineThresholdBonusAttributeName))
+      Threshold += *AttrThresholdBonus;
+
     if (auto Result = costBenefitAnalysis()) {
       DecidedByCostBenefit = true;
       if (*Result)
diff --git a/llvm/test/Transforms/Inline/inline-cost-attributes.ll b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
index 71264ab6c389f..49872d19a9fe5 100644
--- a/llvm/test/Transforms/Inline/inline-cost-attributes.ll
+++ b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
@@ -11,12 +11,15 @@ entry:
 
 define void @fn2() "function-inline-threshold"="41" {
 ; INLINER-LABEL: Inlining calls in: fn2
-; INLINER-NEXT: Function size: 7
+; INLINER-NEXT: Function size: 8
 ; INLINER-NEXT: NOT Inlining (cost=321, threshold=123), Call:   call void @fn1()
 ; INLINER-NEXT: NOT Inlining (cost=963, threshold=123), Call:   call void @fn1()
 ; INLINER-NEXT: NOT Inlining (cost=321, threshold=321), Call:   call void @fn1()
 ; INLINER-NEXT: NOT Inlining (cost=197, threshold=123), Call:   call void @fn1()
 ; INLINER-NEXT: Inlining (cost=197, threshold=321), Call:   call void @fn1()
+; INLINER-NEXT: Size after inlining: 7
+; INLINER-NEXT: Inlining (cost=321, threshold=523), Call:   call void @fn1()
+; INLINER-NEXT: Size after inlining: 6
 
 ; COST-LABEL: define void @fn2()
 ; COST-NEXT: entry:
@@ -32,6 +35,8 @@ define void @fn2() "function-inline-threshold"="41" {
 ; COST-NEXT: call void @fn1()
 ; COST-NEXT: cost delta = 473
 ; COST-NEXT: call void @fn1()
+; COST-NEXT: cost delta = 271
+; COST-NEXT: call void @fn1()
 
 entry:
   call void @extern()
@@ -40,6 +45,7 @@ entry:
   call void @fn1() "call-inline-cost"="0" "function-inline-threshold"="321"
   call void @fn1() "call-threshold-bonus"="17" "function-inline-cost"="197"
   call void @fn1() "call-inline-cost"="473" "function-inline-cost"="197" "function-inline-threshold"="321"
+  call void @fn1() "function-inline-threshold-bonus"="400"
   ret void
 }
 
diff --git a/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td
index e6321ef58b45f..3755c262eca41 100644
--- a/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td
@@ -52,4 +52,15 @@ def StackToSharedPass : Pass<"omp-stack-to-shared", "mlir::LLVM::LLVMFuncOp"> {
   let dependentDialects = ["mlir::omp::OpenMPDialect"];
 }
 
+def OpenMPSIMDInlineBoostPass : Pass<"omp-simd-inline-boost", "ModuleOp"> {
+  let summary = "Boost inline threshold for calls inside OpenMP SIMD loops";
+  let description = [{
+    Marks function calls inside omp.simd regions with a discardable attribute
+    (omp.simd_inline_boost) so that the FIR-to-LLVM conversion can set
+    "function-inline-threshold-bonus" on the resulting llvm.call. This enables
+    aggressive inlining of scalar function calls inside SIMD loops, allowing
+    LoopVectorize to vectorize the inlined loop body.
+  }];
+  let dependentDialects = ["mlir::omp::OpenMPDialect"];
+}
 #endif // MLIR_DIALECT_OPENMP_TRANSFORMS_PASSES
diff --git a/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt
index 569786fe95cf3..a916fe257cc98 100644
--- a/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt
@@ -2,6 +2,7 @@ add_mlir_dialect_library(MLIROpenMPTransforms
   MarkDeclareTarget.cpp
   OpenMPOffloadPrivatizationPrepare.cpp
   StackToShared.cpp
+  OpenMPSIMDInlineBoost.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenMP
diff --git a/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp b/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
new file mode 100644
index 0000000000000..f4b2dc39338ae
--- /dev/null
+++ b/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
@@ -0,0 +1,49 @@
+//===- OpenMPSIMDInlineBoost.cpp
+//-------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Mark function calls inside OpenMP SIMD regions with omp.simd_inline_boost
+// so FIR-to-LLVM conversion can add an LLVM inline-threshold bonus, enabling
+// more aggressive inlining for vectorization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace omp {
+
+#define GEN_PASS_DEF_OPENMPSIMDINLINEBOOSTPASS
+#include "mlir/Dialect/OpenMP/Transforms/Passes.h.inc"
+
+} // namespace omp
+} // namespace mlir
+
+using namespace mlir;
+namespace {
+
+class OpenMPSIMDInlineBoostPass
+    : public omp::impl::OpenMPSIMDInlineBoostPassBase<
+          OpenMPSIMDInlineBoostPass> {
+
+  void runOnOperation() override {
+    getOperation()->walk([](omp::SimdOp simdOp) {
+      simdOp->walk([](CallOpInterface callOp) {
+        Operation *op = callOp.getOperation();
+        if (op->hasAttr("omp.simd_inline_boost"))
+          return;
+        op->setAttr("omp.simd_inline_boost", UnitAttr::get(op->getContext()));
+      });
+    });
+  }
+};
+
+} // namespace
diff --git a/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir b/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
new file mode 100644
index 0000000000000..80d19a87e1378
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
@@ -0,0 +1,56 @@
+// RUN: mlir-opt -omp-simd-inline-boost %s | FileCheck %s
+
+func.func private @callee(%arg0: f32) -> f32
+
+// CHECK-LABEL: func.func @simd_with_call
+func.func @simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
+  omp.simd {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %val = memref.load %a[%iv] : memref<?xf32>
+      // CHECK: func.call @callee(%{{.*}}) {omp.simd_inline_boost} : (f32) -> f32
+      %res = func.call @callee(%val) : (f32) -> f32
+      memref.store %res, %a[%iv] : memref<?xf32>
+      omp.yield
+    }
+  }
+  return
+}
+
+// Calls outside omp.simd should NOT be modified.
+// CHECK-LABEL: func.func @no_simd
+func.func @no_simd(%v: f32) -> f32 {
+  // CHECK: call @callee(%{{.*}}) : (f32) -> f32
+  // CHECK-NOT: omp.simd_inline_boost
+  %res = func.call @callee(%v) : (f32) -> f32
+  return %res : f32
+}
+
+// Composite wsloop+simd: calls inside omp.simd should be boosted.
+// CHECK-LABEL: func.func @wsloop_simd_with_call
+func.func @wsloop_simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
+  omp.wsloop {
+    omp.simd {
+      omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+        %val = memref.load %a[%iv] : memref<?xf32>
+        // CHECK: func.call @callee(%{{.*}}) {omp.simd_inline_boost} : (f32) -> f32
+        %res = func.call @callee(%val) : (f32) -> f32
+        memref.store %res, %a[%iv] : memref<?xf32>
+        omp.yield
+      }
+    } {omp.composite}
+  } {omp.composite}
+  return
+}
+
+// Calls already marked should not be re-marked (idempotent).
+// CHECK-LABEL: func.func @already_marked
+func.func @already_marked(%lb: index, %ub: index, %step: index, %v: f32) {
+  omp.simd {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      // CHECK: func.call @callee(%{{.*}}) {omp.simd_inline_boost} : (f32) -> f32
+      %res = func.call @callee(%v) {omp.simd_inline_boost} : (f32) -> f32
+      omp.yield
+    }
+  }
+  return
+}

>From ec0d6c9d70ec338672626f66dd6e54d19011d3b5 Mon Sep 17 00:00:00 2001
From: "Chi Chun, Chen" <chichun.chen at hpe.com>
Date: Wed, 6 May 2026 12:40:13 -0500
Subject: [PATCH 2/3] Guard SIMD inline boost behind flag and limit to declare
 simd callees

- Add -openmp-simd-inline-boost cl::opt flag (hidden, default off) to gate
  the pass. The pass only runs when both -fopenmp and this flag are set.
- Only boost calls to functions that contain an omp.declare_simd op,
  rather than all calls inside omp.simd regions.
- host-eval.f90 test change is not needed now since pass not enabled by
  default. The change was made because the omp.simd has same prefix as
  the new attribute
- Tests updated accordingly
---
 flang/lib/Optimizer/Passes/Pipelines.cpp      | 10 +++++-
 .../Integration/OpenMP/simd-inline-boost.f90  | 27 ++++++++------
 flang/test/Lower/OpenMP/host-eval.f90         |  4 +--
 .../Transforms/OpenMPSIMDInlineBoost.cpp      | 36 ++++++++++++++++---
 .../Dialect/OpenMP/simd-inline-boost.mlir     | 29 +++++++++++++--
 5 files changed, 85 insertions(+), 21 deletions(-)

diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 77c58c8237a17..0c3160c1bcc14 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -25,6 +25,13 @@ static llvm::cl::opt<bool> disableArgumentFakeUse("disable-argument-fake-use",
                                                   llvm::cl::Hidden,
                                                   llvm::cl::init(false));
 
+/// Enable boosting inline threshold for calls inside OpenMP SIMD regions.
+static llvm::cl::opt<bool> enableOpenMPSIMDInlineBoost(
+    "openmp-simd-inline-boost", llvm::cl::Hidden,
+    llvm::cl::desc("Enable experimental inline-threshold boost for calls to "
+                   "declare-simd functions inside OpenMP SIMD loops"),
+    llvm::cl::init(false));
+
 namespace fir {
 
 template <typename F>
@@ -370,7 +377,8 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
   pm.addPass(flangomp::createAutomapToTargetDataPass());
   pm.addPass(flangomp::createMapInfoFinalizationPass());
   pm.addPass(mlir::omp::createMarkDeclareTargetPass());
-  pm.addPass(mlir::omp::createOpenMPSIMDInlineBoostPass());
+  if (enableOpenMPSIMDInlineBoost)
+    pm.addPass(mlir::omp::createOpenMPSIMDInlineBoostPass());
 
   // Delete unreachable target operations before FunctionFilteringPass
   // extracts them.
diff --git a/flang/test/Integration/OpenMP/simd-inline-boost.f90 b/flang/test/Integration/OpenMP/simd-inline-boost.f90
index 28000b2583bd6..e9bc8ded29ac5 100644
--- a/flang/test/Integration/OpenMP/simd-inline-boost.f90
+++ b/flang/test/Integration/OpenMP/simd-inline-boost.f90
@@ -1,7 +1,14 @@
-! Test that function calls inside !$omp simd loops get boosted inline thresholds.
-!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+! Test that function calls to declare simd functions inside !$omp simd loops
+! get the omp.simd_inline_boost attribute when -openmp-simd-inline-boost is set.
+!RUN: %flang_fc1 -emit-mlir -fopenmp -mmlir -openmp-simd-inline-boost %s -o - | FileCheck %s
 
-! CHECK-LABEL: define {{.*}} @test_simd_
+real function foo(v)
+  !$omp declare simd
+  real, intent(in) :: v
+  foo = v * v
+end function
+
+! CHECK-LABEL: func.func @_QPtest_simd
 subroutine test_simd(x, n)
   implicit none
   integer, intent(in) :: n
@@ -9,30 +16,30 @@ subroutine test_simd(x, n)
   integer :: i
   interface
     real function foo(v)
+      !$omp declare simd
       real, intent(in) :: v
     end function
   end interface
   !$omp simd
   do i = 1, n
-    ! CHECK: call {{.*}}@foo_({{.*}}) #[[BOOST:[0-9]+]]
+    ! CHECK: fir.call @_QPfoo({{.*}}) {{.*}}omp.simd_inline_boost
     x(i) = foo(x(i))
   end do
   !$omp end simd
 end subroutine
 
-! Calls outside !$omp simd should NOT get the attribute.
-! CHECK-LABEL: define {{.*}} @no_simd_
+! Calls to declare simd functions outside !$omp simd should NOT get the attribute.
+! CHECK-LABEL: func.func @_QPno_simd
 subroutine no_simd(x)
   implicit none
   real, intent(inout) :: x
   interface
     real function foo(v)
+      !$omp declare simd
       real, intent(in) :: v
     end function
   end interface
-  ! CHECK: call {{.*}}@foo_({{.*}})
-  ! CHECK-NOT: call {{.*}}@foo_({{.*}}) #[[BOOST]]
+  ! CHECK: fir.call @_QPfoo({{.*}})
+  ! CHECK-NOT: omp.simd_inline_boost
   x = foo(x)
 end subroutine
-
-! CHECK: attributes #[[BOOST]] = {{{.*}}"function-inline-threshold-bonus"="2000"{{.*}}}
diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90
index 96a6b5e00d630..7a9c08895189d 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -160,7 +160,7 @@ subroutine distribute_parallel_do_simd()
   ! DEVICE-NOT: omp.parallel
   ! DEVICE-NOT: omp.distribute
   ! DEVICE-NOT: omp.wsloop
-  ! DEVICE-NOT: {{^ *}}omp.simd{{[ {]}}
+  ! DEVICE-NOT: omp.simd
   !$omp distribute parallel do simd num_threads(1)
   do i=1,10
     call foo()
@@ -269,7 +269,7 @@ subroutine distribute_simd()
   ! HOST-NEXT: omp.simd
 
   ! DEVICE-NOT: omp.distribute
-  ! DEVICE-NOT: {{^ *}}omp.simd{{[ {]}}
+  ! DEVICE-NOT: omp.simd
   !$omp distribute simd
   do i=1,10
     call foo()
diff --git a/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp b/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
index f4b2dc39338ae..34a6523ccbd72 100644
--- a/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
+++ b/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
@@ -7,14 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Mark function calls inside OpenMP SIMD regions with omp.simd_inline_boost
-// so FIR-to-LLVM conversion can add an LLVM inline-threshold bonus, enabling
-// more aggressive inlining for vectorization.
+// Mark calls inside OpenMP SIMD regions with `omp.simd_inline_boost` so that
+// FIR-to-LLVM conversion can attach an LLVM inline-threshold bonus to calls to
+// functions containing `omp.declare_simd`, making them more likely to be
+// inlined for vectorization.
 //
 //===----------------------------------------------------------------------===//
 
 #include "mlir/Dialect/OpenMP/OpenMPDialect.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/SymbolTable.h"
 #include "mlir/Interfaces/CallInterfaces.h"
 #include "mlir/Pass/Pass.h"
 
@@ -30,16 +32,40 @@ namespace omp {
 using namespace mlir;
 namespace {
 
+static bool calleeHasDeclareSimd(CallOpInterface callOp,
+                                 SymbolTable &symTable) {
+  auto callableRef = callOp.getCallableForCallee();
+  if (!callableRef)
+    return false;
+  auto symRef = dyn_cast<SymbolRefAttr>(callableRef);
+  if (!symRef)
+    return false;
+  auto *callee = symTable.lookup(symRef.getRootReference());
+  if (!callee)
+    return false;
+  bool found = false;
+  callee->walk([&](omp::DeclareSimdOp) {
+    found = true;
+    return WalkResult::interrupt();
+  });
+  return found;
+}
+
 class OpenMPSIMDInlineBoostPass
     : public omp::impl::OpenMPSIMDInlineBoostPassBase<
           OpenMPSIMDInlineBoostPass> {
 
   void runOnOperation() override {
-    getOperation()->walk([](omp::SimdOp simdOp) {
-      simdOp->walk([](CallOpInterface callOp) {
+    ModuleOp module = getOperation();
+    SymbolTable symTable(module);
+
+    module->walk([&](omp::SimdOp simdOp) {
+      simdOp->walk([&](CallOpInterface callOp) {
         Operation *op = callOp.getOperation();
         if (op->hasAttr("omp.simd_inline_boost"))
           return;
+        if (!calleeHasDeclareSimd(callOp, symTable))
+          return;
         op->setAttr("omp.simd_inline_boost", UnitAttr::get(op->getContext()));
       });
     });
diff --git a/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir b/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
index 80d19a87e1378..8da9735f2d753 100644
--- a/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
+++ b/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
@@ -1,6 +1,13 @@
 // RUN: mlir-opt -omp-simd-inline-boost %s | FileCheck %s
 
-func.func private @callee(%arg0: f32) -> f32
+func.func @callee(%arg0: f32) -> f32 {
+  omp.declare_simd
+  return %arg0 : f32
+}
+
+func.func @no_simd_callee(%arg0: f32) -> f32 {
+  return %arg0 : f32
+}
 
 // CHECK-LABEL: func.func @simd_with_call
 func.func @simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
@@ -16,6 +23,22 @@ func.func @simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32
   return
 }
 
+// Calls to functions without declare simd should NOT be boosted.
+// CHECK-LABEL: func.func @simd_without_declare_simd
+func.func @simd_without_declare_simd(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
+  omp.simd {
+    omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+      %val = memref.load %a[%iv] : memref<?xf32>
+      // CHECK: func.call @no_simd_callee(%{{.*}}) : (f32) -> f32
+      // CHECK-NOT: omp.simd_inline_boost
+      %res = func.call @no_simd_callee(%val) : (f32) -> f32
+      memref.store %res, %a[%iv] : memref<?xf32>
+      omp.yield
+    }
+  }
+  return
+}
+
 // Calls outside omp.simd should NOT be modified.
 // CHECK-LABEL: func.func @no_simd
 func.func @no_simd(%v: f32) -> f32 {
@@ -25,7 +48,7 @@ func.func @no_simd(%v: f32) -> f32 {
   return %res : f32
 }
 
-// Composite wsloop+simd: calls inside omp.simd should be boosted.
+// Calls to declare simd functions should be boosted.
 // CHECK-LABEL: func.func @wsloop_simd_with_call
 func.func @wsloop_simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
   omp.wsloop {
@@ -42,7 +65,7 @@ func.func @wsloop_simd_with_call(%lb: index, %ub: index, %step: index, %a: memre
   return
 }
 
-// Calls already marked should not be re-marked (idempotent).
+// Calls already marked should not be re-marked.
 // CHECK-LABEL: func.func @already_marked
 func.func @already_marked(%lb: index, %ub: index, %step: index, %v: f32) {
   omp.simd {

>From cf649d9f5cdd1e8c6f5eb9362548d21f1916a1a5 Mon Sep 17 00:00:00 2001
From: "Chi Chun, Chen" <chichun.chen at hpe.com>
Date: Wed, 6 May 2026 12:31:41 -0500
Subject: [PATCH 3/3] Apply threshold bonus before `shouldStop`

function-inline-threshold-bonus was previously applied during
finalizeAnalysis(), after the callee walk completed. In normal inline-cost
mode, shouldStop() can stop analysis as soon as Cost reaches Threshold, so a
call whose cost exceeds the unboosted threshold can be rejected before the
bonus is considered.

Apply the bonus in onAnalysisStart() so the early bailout uses the boosted
threshold. If finalizeAnalysis() later sees an explicit
function-inline-threshold override, re-apply the bonus after that override
because the override resets Threshold. Otherwise, keep the bonus applied only
once.

Add a regression test for a call with function-inline-threshold-bonus but no
explicit function-inline-threshold, so the bonus is not counted twice.
---
 llvm/lib/Analysis/InlineCost.cpp              | 19 ++++++++++++++-----
 .../Inline/inline-cost-attributes.ll          | 15 +++++++++++++++
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index fb1163fb24d31..8a1d3ba2c139b 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1117,13 +1117,17 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
       Cost *= *AttrCostMult;
 
     if (std::optional<int> AttrThreshold =
-            getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
+            getStringFnAttrAsInt(CandidateCall, "function-inline-threshold")) {
       Threshold = *AttrThreshold;
 
-    if (std::optional<int> AttrThresholdBonus = getStringFnAttrAsInt(
-            CandidateCall,
-            InlineConstants::FunctionInlineThresholdBonusAttributeName))
-      Threshold += *AttrThresholdBonus;
+      // The threshold bonus was already applied in onAnalysisStart() so that
+      // shouldStop() observes it. Re-apply it only when the explicit threshold
+      // override above resets Threshold.
+      if (std::optional<int> AttrThresholdBonus = getStringFnAttrAsInt(
+              CandidateCall,
+              InlineConstants::FunctionInlineThresholdBonusAttributeName))
+        Threshold += *AttrThresholdBonus;
+    }
 
     if (auto Result = costBenefitAnalysis()) {
       DecidedByCostBenefit = true;
@@ -1186,6 +1190,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
     // the rest of the function body.
     Threshold += (SingleBBBonus + VectorBonus);
 
+    if (std::optional<int> AttrThresholdBonus = getStringFnAttrAsInt(
+            CandidateCall,
+            InlineConstants::FunctionInlineThresholdBonusAttributeName))
+      Threshold += *AttrThresholdBonus;
+
     // Give out bonuses for the callsite, as the instructions setting them up
     // will be gone after inlining.
     addCost(-getCallsiteCost(TTI, this->CandidateCall, DL));
diff --git a/llvm/test/Transforms/Inline/inline-cost-attributes.ll b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
index 49872d19a9fe5..480d19f775d23 100644
--- a/llvm/test/Transforms/Inline/inline-cost-attributes.ll
+++ b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
@@ -61,3 +61,18 @@ entry:
   call void @fn2()
   ret void
 }
+
+define void @fn_no_threshold() {
+entry:
+  ret void
+}
+
+define void @fn4() {
+; INLINER-LABEL: Inlining calls in: fn4
+; INLINER-NEXT: Function size: 2
+; INLINER-NEXT: NOT Inlining (cost=900, threshold=737), Call:   call void @fn_no_threshold()
+
+entry:
+  call void @fn_no_threshold() "function-inline-cost"="900" "function-inline-threshold-bonus"="400"
+  ret void
+}



More information about the Mlir-commits mailing list