[flang-commits] [flang] [llvm] [mlir] [flang][mlir][OpenMP] Boost inline threshold for calls inside OpenMP SIMD loops (PR #195903)
via flang-commits
flang-commits at lists.llvm.org
Tue May 5 13:57:02 PDT 2026
https://github.com/chichunchen updated https://github.com/llvm/llvm-project/pull/195903
>From 2c12ea53c50028375046461cb2f53e22a3ab5381 Mon Sep 17 00:00:00 2001
From: "Chi Chun, Chen" <chichun.chen at hpe.com>
Date: Tue, 14 Apr 2026 18:23:13 -0500
Subject: [PATCH] [flang][mlir][OpenMP] Boost inline threshold for calls inside
OpenMP SIMD loops
LLVM currently has no pass that generates vector function bodies (simd clones)
for OpenMP `declare simd` functions[1]. As as result, when a scalar function is
called inside an `!$omp simd` loop, LoopVectorize cannot vectorize the loop
because the call remains scalar.
This patch added a mlir pass (omp-simd-inline-boost) to mark function calls inside
`omp.simd` regions. The existing LLVM inliner uses that mark to increase inline
threshold so that LoopVectorize can widen the inlined scalar instructions naturally.
[1] https://discourse.llvm.org/t/rfc-aggressive-inlinging-for-openmp-simd-loops/90558
Assisted by Copilot.
---
flang/lib/Optimizer/CodeGen/CodeGen.cpp | 12 ++++
flang/lib/Optimizer/Passes/Pipelines.cpp | 1 +
flang/test/Fir/simd-inline-boost-codegen.fir | 13 +++++
.../Integration/OpenMP/simd-inline-boost.f90 | 38 +++++++++++++
flang/test/Lower/OpenMP/host-eval.f90 | 4 +-
llvm/include/llvm/Analysis/InlineCost.h | 3 +
llvm/lib/Analysis/InlineCost.cpp | 5 ++
.../Inline/inline-cost-attributes.ll | 8 ++-
.../mlir/Dialect/OpenMP/Transforms/Passes.td | 11 ++++
.../Dialect/OpenMP/Transforms/CMakeLists.txt | 1 +
.../Transforms/OpenMPSIMDInlineBoost.cpp | 49 ++++++++++++++++
.../Dialect/OpenMP/simd-inline-boost.mlir | 56 +++++++++++++++++++
12 files changed, 198 insertions(+), 3 deletions(-)
create mode 100644 flang/test/Fir/simd-inline-boost-codegen.fir
create mode 100644 flang/test/Integration/OpenMP/simd-inline-boost.f90
create mode 100644 mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
create mode 100644 mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 7d1068c25e7ca..c967a92d502a8 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -775,6 +775,18 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
call.getAccessGroups())
llvmCall.setAccessGroups(*optionalAccessGroups);
+ // Boost inlining of calls inside OpenMP SIMD regions.
+ if (call->hasAttr("omp.simd_inline_boost")) {
+ mlir::NamedAttrList defaultFuncAttrs;
+ if (mlir::DictionaryAttr attrs = llvmCall.getDefaultFuncAttrsAttr())
+ defaultFuncAttrs.append(attrs.begin(), attrs.end());
+ defaultFuncAttrs.set("function-inline-threshold-bonus",
+ rewriter.getStringAttr("2000"));
+ llvmCall.setDefaultFuncAttrsAttr(
+ defaultFuncAttrs.getDictionary(rewriter.getContext()));
+ llvmCall->removeAttr("omp.simd_inline_boost");
+ }
+
if (memAttr)
llvmCall.setMemoryEffectsAttr(
mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 920d6f86a355e..77c58c8237a17 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -370,6 +370,7 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
pm.addPass(flangomp::createAutomapToTargetDataPass());
pm.addPass(flangomp::createMapInfoFinalizationPass());
pm.addPass(mlir::omp::createMarkDeclareTargetPass());
+ pm.addPass(mlir::omp::createOpenMPSIMDInlineBoostPass());
// Delete unreachable target operations before FunctionFilteringPass
// extracts them.
diff --git a/flang/test/Fir/simd-inline-boost-codegen.fir b/flang/test/Fir/simd-inline-boost-codegen.fir
new file mode 100644
index 0000000000000..68d9a33ffbd0a
--- /dev/null
+++ b/flang/test/Fir/simd-inline-boost-codegen.fir
@@ -0,0 +1,13 @@
+// RUN: fir-opt --fir-to-llvm-ir %s | FileCheck %s
+
+module {
+ func.func private @foo()
+
+ func.func @test_merge_default_func_attrs() {
+ // CHECK-LABEL: llvm.func @test_merge_default_func_attrs
+ // CHECK: llvm.call @foo() {default_func_attrs = {existing = "1", "function-inline-threshold-bonus" = "2000"}} : () -> ()
+ // CHECK-NOT: omp.simd_inline_boost
+ fir.call @foo() {default_func_attrs = {existing = "1"}, omp.simd_inline_boost} : () -> ()
+ return
+ }
+}
diff --git a/flang/test/Integration/OpenMP/simd-inline-boost.f90 b/flang/test/Integration/OpenMP/simd-inline-boost.f90
new file mode 100644
index 0000000000000..28000b2583bd6
--- /dev/null
+++ b/flang/test/Integration/OpenMP/simd-inline-boost.f90
@@ -0,0 +1,38 @@
+! Test that function calls inside !$omp simd loops get boosted inline thresholds.
+!RUN: %flang_fc1 -emit-llvm -fopenmp %s -o - | FileCheck %s
+
+! CHECK-LABEL: define {{.*}} @test_simd_
+subroutine test_simd(x, n)
+ implicit none
+ integer, intent(in) :: n
+ real, intent(inout) :: x(n)
+ integer :: i
+ interface
+ real function foo(v)
+ real, intent(in) :: v
+ end function
+ end interface
+ !$omp simd
+ do i = 1, n
+ ! CHECK: call {{.*}}@foo_({{.*}}) #[[BOOST:[0-9]+]]
+ x(i) = foo(x(i))
+ end do
+ !$omp end simd
+end subroutine
+
+! Calls outside !$omp simd should NOT get the attribute.
+! CHECK-LABEL: define {{.*}} @no_simd_
+subroutine no_simd(x)
+ implicit none
+ real, intent(inout) :: x
+ interface
+ real function foo(v)
+ real, intent(in) :: v
+ end function
+ end interface
+ ! CHECK: call {{.*}}@foo_({{.*}})
+ ! CHECK-NOT: call {{.*}}@foo_({{.*}}) #[[BOOST]]
+ x = foo(x)
+end subroutine
+
+! CHECK: attributes #[[BOOST]] = {{{.*}}"function-inline-threshold-bonus"="2000"{{.*}}}
diff --git a/flang/test/Lower/OpenMP/host-eval.f90 b/flang/test/Lower/OpenMP/host-eval.f90
index 7a9c08895189d..96a6b5e00d630 100644
--- a/flang/test/Lower/OpenMP/host-eval.f90
+++ b/flang/test/Lower/OpenMP/host-eval.f90
@@ -160,7 +160,7 @@ subroutine distribute_parallel_do_simd()
! DEVICE-NOT: omp.parallel
! DEVICE-NOT: omp.distribute
! DEVICE-NOT: omp.wsloop
- ! DEVICE-NOT: omp.simd
+ ! DEVICE-NOT: {{^ *}}omp.simd{{[ {]}}
!$omp distribute parallel do simd num_threads(1)
do i=1,10
call foo()
@@ -269,7 +269,7 @@ subroutine distribute_simd()
! HOST-NEXT: omp.simd
! DEVICE-NOT: omp.distribute
- ! DEVICE-NOT: omp.simd
+ ! DEVICE-NOT: {{^ *}}omp.simd{{[ {]}}
!$omp distribute simd
do i=1,10
call foo()
diff --git a/llvm/include/llvm/Analysis/InlineCost.h b/llvm/include/llvm/Analysis/InlineCost.h
index 1faf480c590ac..4ab50b6ba75cc 100644
--- a/llvm/include/llvm/Analysis/InlineCost.h
+++ b/llvm/include/llvm/Analysis/InlineCost.h
@@ -60,6 +60,9 @@ const uint64_t MaxSimplifiedDynamicAllocaToInline = 65536;
const char FunctionInlineCostMultiplierAttributeName[] =
"function-inline-cost-multiplier";
+const char FunctionInlineThresholdBonusAttributeName[] =
+ "function-inline-threshold-bonus";
+
const char MaxInlineStackSizeAttributeName[] = "inline-max-stacksize";
} // namespace InlineConstants
diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp
index d975a93e9b1fd..fb1163fb24d31 100644
--- a/llvm/lib/Analysis/InlineCost.cpp
+++ b/llvm/lib/Analysis/InlineCost.cpp
@@ -1120,6 +1120,11 @@ class InlineCostCallAnalyzer final : public CallAnalyzer {
getStringFnAttrAsInt(CandidateCall, "function-inline-threshold"))
Threshold = *AttrThreshold;
+ if (std::optional<int> AttrThresholdBonus = getStringFnAttrAsInt(
+ CandidateCall,
+ InlineConstants::FunctionInlineThresholdBonusAttributeName))
+ Threshold += *AttrThresholdBonus;
+
if (auto Result = costBenefitAnalysis()) {
DecidedByCostBenefit = true;
if (*Result)
diff --git a/llvm/test/Transforms/Inline/inline-cost-attributes.ll b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
index 71264ab6c389f..49872d19a9fe5 100644
--- a/llvm/test/Transforms/Inline/inline-cost-attributes.ll
+++ b/llvm/test/Transforms/Inline/inline-cost-attributes.ll
@@ -11,12 +11,15 @@ entry:
define void @fn2() "function-inline-threshold"="41" {
; INLINER-LABEL: Inlining calls in: fn2
-; INLINER-NEXT: Function size: 7
+; INLINER-NEXT: Function size: 8
; INLINER-NEXT: NOT Inlining (cost=321, threshold=123), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=963, threshold=123), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=321, threshold=321), Call: call void @fn1()
; INLINER-NEXT: NOT Inlining (cost=197, threshold=123), Call: call void @fn1()
; INLINER-NEXT: Inlining (cost=197, threshold=321), Call: call void @fn1()
+; INLINER-NEXT: Size after inlining: 7
+; INLINER-NEXT: Inlining (cost=321, threshold=523), Call: call void @fn1()
+; INLINER-NEXT: Size after inlining: 6
; COST-LABEL: define void @fn2()
; COST-NEXT: entry:
@@ -32,6 +35,8 @@ define void @fn2() "function-inline-threshold"="41" {
; COST-NEXT: call void @fn1()
; COST-NEXT: cost delta = 473
; COST-NEXT: call void @fn1()
+; COST-NEXT: cost delta = 271
+; COST-NEXT: call void @fn1()
entry:
call void @extern()
@@ -40,6 +45,7 @@ entry:
call void @fn1() "call-inline-cost"="0" "function-inline-threshold"="321"
call void @fn1() "call-threshold-bonus"="17" "function-inline-cost"="197"
call void @fn1() "call-inline-cost"="473" "function-inline-cost"="197" "function-inline-threshold"="321"
+ call void @fn1() "function-inline-threshold-bonus"="400"
ret void
}
diff --git a/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td
index e6321ef58b45f..3755c262eca41 100644
--- a/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td
+++ b/mlir/include/mlir/Dialect/OpenMP/Transforms/Passes.td
@@ -52,4 +52,15 @@ def StackToSharedPass : Pass<"omp-stack-to-shared", "mlir::LLVM::LLVMFuncOp"> {
let dependentDialects = ["mlir::omp::OpenMPDialect"];
}
+def OpenMPSIMDInlineBoostPass : Pass<"omp-simd-inline-boost", "ModuleOp"> {
+ let summary = "Boost inline threshold for calls inside OpenMP SIMD loops";
+ let description = [{
+ Marks function calls inside omp.simd regions with a discardable attribute
+ (omp.simd_inline_boost) so that the FIR-to-LLVM conversion can set
+ "function-inline-threshold-bonus" on the resulting llvm.call. This enables
+ aggressive inlining of scalar function calls inside SIMD loops, allowing
+ LoopVectorize to vectorize the inlined loop body.
+ }];
+ let dependentDialects = ["mlir::omp::OpenMPDialect"];
+}
#endif // MLIR_DIALECT_OPENMP_TRANSFORMS_PASSES
diff --git a/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt
index 569786fe95cf3..a916fe257cc98 100644
--- a/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/OpenMP/Transforms/CMakeLists.txt
@@ -2,6 +2,7 @@ add_mlir_dialect_library(MLIROpenMPTransforms
MarkDeclareTarget.cpp
OpenMPOffloadPrivatizationPrepare.cpp
StackToShared.cpp
+ OpenMPSIMDInlineBoost.cpp
ADDITIONAL_HEADER_DIRS
${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/OpenMP
diff --git a/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp b/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
new file mode 100644
index 0000000000000..f4b2dc39338ae
--- /dev/null
+++ b/mlir/lib/Dialect/OpenMP/Transforms/OpenMPSIMDInlineBoost.cpp
@@ -0,0 +1,49 @@
+//===- OpenMPSIMDInlineBoost.cpp
+//-------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Mark function calls inside OpenMP SIMD regions with omp.simd_inline_boost
+// so FIR-to-LLVM conversion can add an LLVM inline-threshold bonus, enabling
+// more aggressive inlining for vectorization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/Interfaces/CallInterfaces.h"
+#include "mlir/Pass/Pass.h"
+
+namespace mlir {
+namespace omp {
+
+#define GEN_PASS_DEF_OPENMPSIMDINLINEBOOSTPASS
+#include "mlir/Dialect/OpenMP/Transforms/Passes.h.inc"
+
+} // namespace omp
+} // namespace mlir
+
+using namespace mlir;
+namespace {
+
+class OpenMPSIMDInlineBoostPass
+ : public omp::impl::OpenMPSIMDInlineBoostPassBase<
+ OpenMPSIMDInlineBoostPass> {
+
+ void runOnOperation() override {
+ getOperation()->walk([](omp::SimdOp simdOp) {
+ simdOp->walk([](CallOpInterface callOp) {
+ Operation *op = callOp.getOperation();
+ if (op->hasAttr("omp.simd_inline_boost"))
+ return;
+ op->setAttr("omp.simd_inline_boost", UnitAttr::get(op->getContext()));
+ });
+ });
+ }
+};
+
+} // namespace
diff --git a/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir b/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
new file mode 100644
index 0000000000000..80d19a87e1378
--- /dev/null
+++ b/mlir/test/Dialect/OpenMP/simd-inline-boost.mlir
@@ -0,0 +1,56 @@
+// RUN: mlir-opt -omp-simd-inline-boost %s | FileCheck %s
+
+func.func private @callee(%arg0: f32) -> f32
+
+// CHECK-LABEL: func.func @simd_with_call
+func.func @simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
+ omp.simd {
+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+ %val = memref.load %a[%iv] : memref<?xf32>
+ // CHECK: func.call @callee(%{{.*}}) {omp.simd_inline_boost} : (f32) -> f32
+ %res = func.call @callee(%val) : (f32) -> f32
+ memref.store %res, %a[%iv] : memref<?xf32>
+ omp.yield
+ }
+ }
+ return
+}
+
+// Calls outside omp.simd should NOT be modified.
+// CHECK-LABEL: func.func @no_simd
+func.func @no_simd(%v: f32) -> f32 {
+ // CHECK: call @callee(%{{.*}}) : (f32) -> f32
+ // CHECK-NOT: omp.simd_inline_boost
+ %res = func.call @callee(%v) : (f32) -> f32
+ return %res : f32
+}
+
+// Composite wsloop+simd: calls inside omp.simd should be boosted.
+// CHECK-LABEL: func.func @wsloop_simd_with_call
+func.func @wsloop_simd_with_call(%lb: index, %ub: index, %step: index, %a: memref<?xf32>) {
+ omp.wsloop {
+ omp.simd {
+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+ %val = memref.load %a[%iv] : memref<?xf32>
+ // CHECK: func.call @callee(%{{.*}}) {omp.simd_inline_boost} : (f32) -> f32
+ %res = func.call @callee(%val) : (f32) -> f32
+ memref.store %res, %a[%iv] : memref<?xf32>
+ omp.yield
+ }
+ } {omp.composite}
+ } {omp.composite}
+ return
+}
+
+// Calls already marked should not be re-marked (idempotent).
+// CHECK-LABEL: func.func @already_marked
+func.func @already_marked(%lb: index, %ub: index, %step: index, %v: f32) {
+ omp.simd {
+ omp.loop_nest (%iv) : index = (%lb) to (%ub) step (%step) {
+ // CHECK: func.call @callee(%{{.*}}) {omp.simd_inline_boost} : (f32) -> f32
+ %res = func.call @callee(%v) {omp.simd_inline_boost} : (f32) -> f32
+ omp.yield
+ }
+ }
+ return
+}
More information about the flang-commits
mailing list