[llvm] [InstCombine] Match intrinsic recurrences when known to be hoisted (PR #149858)

Thu Jul 31 06:00:27 PDT 2025

https://github.com/antoniofrighetto updated https://github.com/llvm/llvm-project/pull/149858

>From 76b5d503aa06f98f35f5d4ba2326addf6cbeaa47 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Tue, 29 Jul 2025 13:36:17 +0200
Subject: [PATCH] [InstCombine] Match intrinsic recurrences when known to be
 hoisted

For value-accumulating recurrences of kind:
```
  %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
  %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
```
The binary intrinsic may be simplified into an intrinsic with init
value and the other operand, if the latter is loop-invariant:
```
  %umax = call i8 @llvm.umax.i8(i8 %a, i8 %b)
```

Proofs: https://alive2.llvm.org/ce/z/ea2cVC.

Fixes: https://github.com/llvm/llvm-project/issues/145875.
---
 .../InstCombine/InstCombineCalls.cpp          | 48 +++++++++++++++++++
 .../recurrence-binary-intrinsic.ll            |  8 ++--
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 47e017e17092b..6145ad65d7ae8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1532,6 +1532,51 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
   return nullptr;
 }
 
+// Idempotent binary intrinsics, i.e., intrinsics where `f(f(x, c), c) == f(x,
+// c)` holds.
+static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID) {
+  switch (IID) {
+  case Intrinsic::smax:
+  case Intrinsic::smin:
+  case Intrinsic::umax:
+  case Intrinsic::umin:
+  case Intrinsic::maximum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximumnum:
+  case Intrinsic::minimumnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::minnum:
+    return true;
+  default:
+    return false;
+  }
+}
+
+// Attempt to simplify value-accumulating recurrences of kind:
+//   %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
+//   %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
+// And let the idempotent binary intrinsic be hoisted, when the operands are
+// known to be loop-invariant.
+static Value *foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC,
+                                                      IntrinsicInst *II) {
+  PHINode *PN;
+  Value *Init, *OtherOp;
+
+  // A binary intrinsic recurrence with loop-invariant operands is equivalent to
+  // `call @llvm.binary.intrinsic(Init, OtherOp)`.
+  auto IID = II->getIntrinsicID();
+  if (!isIdempotentBinaryIntrinsic(IID) ||
+      !matchSimpleBinaryIntrinsicRecurrence(II, PN, Init, OtherOp) ||
+      !IC.getDominatorTree().dominates(OtherOp, PN))
+    return nullptr;
+
+  auto *InvariantBinaryInst =
+      IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
+  if (isa<FPMathOperator>(InvariantBinaryInst))
+    cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
+  return InvariantBinaryInst;
+}
+
 static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
   if (!CanReorderLanes)
     return nullptr;
@@ -3912,6 +3957,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
   if (Value *Reverse = foldReversedIntrinsicOperands(II))
     return replaceInstUsesWith(*II, Reverse);
 
+  if (Value *Res = foldIdempotentBinaryIntrinsicRecurrence(*this, II))
+    return replaceInstUsesWith(*II, Res);
+
   // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
   // context, so it is handled in visitCallBase and we should trigger it.
   return visitCallBase(*II);
diff --git a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll
index 86e586ef0a16c..a4e247efc4d23 100644
--- a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll
+++ b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll
@@ -236,12 +236,11 @@ define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b)
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[FMAX:%.*]] = call nnan float @llvm.maximumnum.f32(float [[A]], float [[B]])
 ; CHECK-NEXT:    ret float [[FMAX]]
 ;
 entry:
@@ -265,12 +264,11 @@ define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b)
 ; CHECK-NEXT:    br label %[[LOOP:.*]]
 ; CHECK:       [[LOOP]]:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
-; CHECK-NEXT:    [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
-; CHECK-NEXT:    [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw i32 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]
 ; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[FMIN:%.*]] = call nnan float @llvm.minimumnum.f32(float [[A]], float [[B]])
 ; CHECK-NEXT:    ret float [[FMIN]]
 ;
 entry:
@@ -296,7 +294,7 @@ define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) {
 ; CHECK-NEXT:    [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ]
 ; CHECK-NEXT:    call void @use(i8 [[UMAX_ACC]])
-; CHECK-NEXT:    [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]])
+; CHECK-NEXT:    [[UMAX]] = call i8 @llvm.umax.i8(i8 [[A]], i8 [[B]])
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw i8 [[IV]], 1
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]]
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]]