[llvm] [InstCombine] Match intrinsic recurrences when known to be hoisted (PR #149858)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 31 06:00:27 PDT 2025
https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/149858
>From 76b5d503aa06f98f35f5d4ba2326addf6cbeaa47 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 29 Jul 2025 13:36:17 +0200
Subject: [PATCH] [InstCombine] Match intrinsic recurrences when known to be
hoisted
For value-accumulating recurrences of kind:
```
%umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
%umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
```
The binary intrinsic may be simplified into an intrinsic with init
value and the other operand, if the latter is loop-invariant:
```
%umax = call i8 @llvm.umax.i8(i8 %a, i8 %b)
```
Proofs: https://alive2.llvm.org/ce/z/ea2cVC.
Fixes: https://github.com/llvm/llvm-project/issues/145875.
---
.../InstCombine/InstCombineCalls.cpp | 48 +++++++++++++++++++
.../recurrence-binary-intrinsic.ll | 8 ++--
2 files changed, 51 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 47e017e17092b..6145ad65d7ae8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
return nullptr;
}
+// Idempotent binary intrinsics, i.e., intrinsics where `f(f(x, c), c) == f(x,
+// c)` holds.
+static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
+ switch (IID) {
+ case Intrinsic::smax:
+ case Intrinsic::smin:
+ case Intrinsic::umax:
+ case Intrinsic::umin:
+ case Intrinsic::maximum:
+ case Intrinsic::minimum:
+ case Intrinsic::maximumnum:
+ case Intrinsic::minimumnum:
+ case Intrinsic::maxnum:
+ case Intrinsic::minnum:
+ return true;
+ default:
+ return false;
+ }
+}
+
+// Attempt to simplify value-accumulating recurrences of kind:
+// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
+// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
+// And let the idempotent binary intrinsic be hoisted, when the operands are
+// known to be loop-invariant.
+static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
+ IntrinsicInst *II) {
+ PHINode *PN;
+ Value *Init, *OtherOp;
+
+ // A binary intrinsic recurrence with loop-invariant operands is equivalent to
+ // `call @llvm.binary.intrinsic(Init, OtherOp)`.
+ auto IID = II->getIntrinsicID();
+ if (!isIdempotentBinaryIntrinsic(IID) ||
+ !matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
+ !IC.getDominatorTree().dominates(OtherOp, PN))
+ return nullptr;
+
+ auto *InvariantBinaryInst =
+ IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
+ if (isa<FPMathOperator>(InvariantBinaryInst))
+ cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
+ return InvariantBinaryInst;
+}
+
static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
if (!CanReorderLanes)
return nullptr;
@@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Value *Reverse = foldReversedIntrinsicOperands(II))
return replaceInstUsesWith(*II, Reverse);
+ if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II))
+ return replaceInstUsesWith(*II, Res);
+
// Some intrinsics (like experimental_gc_statepoint) can be used in invoke
// context, so it is handled in visitCallBase and we should trigger it.
return visitCallBase(*II);
diff --git a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll
index 86e586ef0a16c..a4e247efc4d23 100644
--- a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll
@@ -236,12 +236,11 @@ define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
-; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[FMAX:%.*]] = call nnan float @llvm.maximumnum.f32(float [[A]], float [[B]])
; CHECK-NEXT: ret float [[FMAX]]
;
entry:
@@ -265,12 +264,11 @@ define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
-; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[FMIN:%.*]] = call nnan float @llvm.minimumnum.f32(float [[A]], float [[B]])
; CHECK-NEXT: ret float [[FMIN]]
;
entry:
@@ -296,7 +294,7 @@ define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) {
; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
; CHECK-NEXT: call void @use(i8 [[UMAX_ACC]])
-; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
+; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
More information about the llvm-commits
mailing list