[llvm] [TRE] Add tests for intrinsic accumulators (PR #74226)

Joshua Cao via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 2 22:05:10 PST 2023


https://github.com/caojoshua created https://github.com/llvm/llvm-project/pull/74226

There is support for intrinsics in Instruction::isCommunative, but there
is no equivalent implementation for isAssociative. This patch builds
support for associative intrinsics with TRE as an application. TRE can
now have associative intrinsics as an accumulator. For example:
```
struct Node {
  Node *next;
  unsigned val;
}

unsigned maxval(struct Node *n) {
  if (!n) return 0;
  return std::max(n->val, maxval(n->next));
}
```
Can be transformed into:
```
unsigned maxval(struct Node *n) {
  struct Node *head = n;
  unsigned max = 0; // Identity of unsigned std::max
  while (true) {
    if (!head) return max;
    max = std::max(max, head->val);
    head = head->next;
  }
  return max;
}
```
This example results in about 5x speedup in local runs.

We conservatively only consider min/max and as associative for this
patch to limit testing scope. There are probably other intrinsics that
could be considered associative. There are a few consumers of
isAssociative() that could be impacted. Testing has only required to
Reassociate pass be updated.


>From 798f048ac6e4b3d4b6f3b5d936ec0f9ed34c6cd4 Mon Sep 17 00:00:00 2001
From: Joshua Cao <cao.joshua at yahoo.com>
Date: Sun, 14 May 2023 21:12:50 -0700
Subject: [PATCH 1/2] [TRE] Add tests for intrinsic accumulators

---
 .../TailCallElim/tre-minmax-intrinsic.ll      | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll

diff --git a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
new file mode 100644
index 0000000000000..a9ed46b201c1d
--- /dev/null
+++ b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+
+%struct.ListNode = type { i32, ptr }
+
+define noundef i32 @umin(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @umin
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 -1
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umin(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -1, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @umin(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+define noundef i32 @umax(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @umax
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umax(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ 0, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @umax(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.umax.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+define noundef i32 @smin(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @smin
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 2147483647
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smin(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ 2147483647, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @smin(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.smin.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+define noundef i32 @smax(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @smax
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 -2147483648
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smax(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -2147483648, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @smax(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+declare i32 @llvm.umin.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)

>From 549e0333e828100ec51a074ff577d75b2cbe1d66 Mon Sep 17 00:00:00 2001
From: Joshua Cao <cao.joshua at yahoo.com>
Date: Sat, 2 Dec 2023 20:05:24 -0800
Subject: [PATCH 2/2] [IR][TRE] Support associative intrinsics

There is support for intrinsics in Instruction::isCommunative, but there
is no equivalent implementation for isAssociative. This patch builds
support for associative intrinsics with TRE as an application. TRE can
now have associative intrinsics as an accumulator. For example:
```
struct Node {
  Node *next;
  unsigned val;
}

unsigned maxval(struct Node *n) {
  if (!n) return 0;
  return std::max(n->val, maxval(n->next));
}
```
Can be transformed into:
```
unsigned maxval(struct Node *n) {
  struct Node *head = n;
  unsigned max = 0; // Identity of unsigned std::max
  while (true) {
    if (!head) return max;
    max = std::max(max, head->val);
    head = head->next;
  }
  return max;
}
```
This example results in about 5x speedup in local runs.

We conservatively only consider min/max and as associative for this
patch to limit testing scope. There are probably other intrinsics that
could be considered associative. There are a few consumers of
isAssociative() that could be impacted. Testing has only required to
Reassociate pass be updated.
---
 llvm/include/llvm/IR/Constants.h              | 24 +++--
 llvm/include/llvm/IR/IntrinsicInst.h          | 12 +++
 llvm/lib/IR/Constants.cpp                     | 26 ++++++
 llvm/lib/IR/Instruction.cpp                   |  2 +
 llvm/lib/Transforms/Scalar/Reassociate.cpp    |  2 +-
 .../Scalar/TailRecursionElimination.cpp       | 26 ++++--
 .../TailCallElim/tre-minmax-intrinsic.ll      | 88 +++++++++++--------
 7 files changed, 129 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 2f7fc5652c2cd..c3f0d98871c82 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -1095,18 +1096,25 @@ class ConstantExpr : public Constant {
   static Constant *getExactLogBase2(Constant *C);
 
   /// Return the identity constant for a binary opcode.
-  /// The identity constant C is defined as X op C = X and C op X = X for every
-  /// X when the binary operation is commutative. If the binop is not
-  /// commutative, callers can acquire the operand 1 identity constant by
-  /// setting AllowRHSConstant to true. For example, any shift has a zero
-  /// identity constant for operand 1: X shift 0 = X.
-  /// If this is a fadd/fsub operation and we don't care about signed zeros,
-  /// then setting NSZ to true returns the identity +0.0 instead of -0.0.
-  /// Return nullptr if the operator does not have an identity constant.
+  /// If the binop is not commutative, callers can acquire the operand 1
+  /// identity constant by setting AllowRHSConstant to true. For example, any
+  /// shift has a zero identity constant for operand 1: X shift 0 = X. If this
+  /// is a fadd/fsub operation and we don't care about signed zeros, then
+  /// setting NSZ to true returns the identity +0.0 instead of -0.0. Return
+  /// nullptr if the operator does not have an identity constant.
   static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty,
                                     bool AllowRHSConstant = false,
                                     bool NSZ = false);
 
+  static Constant *getIntrinsicIdentity(Intrinsic::ID, Type *Ty);
+
+  /// Return the identity constant for a binary or intrinsic Instruction.
+  /// The identity constant C is defined as X op C = X and C op X = X where C
+  /// and X are the first two operands, and the operation is commutative.
+  static Constant *getIdentity(Instruction *I, Type *Ty,
+                                    bool AllowRHSConstant = false,
+                                    bool NSZ = false);
+
   /// Return the absorbing element for the given binary
   /// operation, i.e. a constant C such that X op C = C and C op X = C for
   /// every X.  For example, this returns zero for integer multiplication.
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index c26ecef6eaaee..8940bebd2c9a2 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -55,6 +55,18 @@ class IntrinsicInst : public CallInst {
     return getCalledFunction()->getIntrinsicID();
   }
 
+  bool isAssociative() const {
+    switch (getIntrinsicID()) {
+    case Intrinsic::smax:
+    case Intrinsic::smin:
+    case Intrinsic::umax:
+    case Intrinsic::umin:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   /// Return true if swapping the first two arguments to the intrinsic produces
   /// the same result.
   bool isCommutative() const {
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index bc55d5b485271..872e0a586f137 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2556,6 +2556,32 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
   }
 }
 
+Constant *ConstantExpr::getIntrinsicIdentity(Intrinsic::ID ID, Type *Ty) {
+  switch (ID) {
+  case Intrinsic::umax:
+      return Constant::getNullValue(Ty);
+  case Intrinsic::umin:
+      return Constant::getAllOnesValue(Ty);
+  case Intrinsic::smax:
+      return Constant::getIntegerValue(
+          Ty, APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+  case Intrinsic::smin:
+      return Constant::getIntegerValue(
+          Ty, APInt::getSignedMaxValue(Ty->getIntegerBitWidth()));
+  default:
+      return nullptr;
+  }
+}
+
+Constant *ConstantExpr::getIdentity(Instruction *I, Type *Ty,
+                                    bool AllowRHSConstant, bool NSZ) {
+  if (I->isBinaryOp())
+      return getBinOpIdentity(I->getOpcode(), Ty, AllowRHSConstant, NSZ);
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+      return getIntrinsicIdentity(II->getIntrinsicID(), Ty);
+  return nullptr;
+}
+
 Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
   switch (Opcode) {
   default:
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 4b5349856b8d7..a6e43de5978b5 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1091,6 +1091,8 @@ const DebugLoc &Instruction::getStableDebugLoc() const {
 }
 
 bool Instruction::isAssociative() const {
+  if (auto *II = dyn_cast<IntrinsicInst>(this))
+    return II->isAssociative();
   unsigned Opcode = getOpcode();
   if (isAssociative(Opcode))
     return true;
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 0d55c72e407e9..d3f6d24d90961 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2554,7 +2554,7 @@ ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
   // Make a "pairmap" of how often each operand pair occurs.
   for (BasicBlock *BI : RPOT) {
     for (Instruction &I : *BI) {
-      if (!I.isAssociative())
+      if (!I.isAssociative() || !I.isBinaryOp())
         continue;
 
       // Ignore nodes that aren't at the root of trees.
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 7b850f05bec11..a8464f5ee9ce0 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -369,12 +369,26 @@ static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
   if (!I->isAssociative() || !I->isCommutative())
     return false;
 
-  assert(I->getNumOperands() == 2 &&
-         "Associative/commutative operations should have 2 args!");
+  Value *LHS;
+  Value *RHS;
+  if (I->isBinaryOp()) {
+    LHS = I->getOperand(0);
+    RHS = I->getOperand(1);
+  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    // Accumulators must have an identity.
+    if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), I->getType()))
+      return false;
+    // 0'th operand is the intrinsic function
+    LHS = I->getOperand(1);
+    RHS = I->getOperand(2);
+  } else {
+    llvm_unreachable(
+        "commutative operations must be either a binary or intrinsic op");
+  }
+
 
   // Exactly one operand should be the result of the call instruction.
-  if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
-      (I->getOperand(0) != CI && I->getOperand(1) != CI))
+  if ((LHS == CI && RHS == CI) || (LHS != CI && RHS != CI))
     return false;
 
   // The only user of this instruction we allow is a single return instruction.
@@ -569,8 +583,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
   for (pred_iterator PI = PB; PI != PE; ++PI) {
     BasicBlock *P = *PI;
     if (P == &F.getEntryBlock()) {
-      Constant *Identity = ConstantExpr::getBinOpIdentity(
-          AccRecInstr->getOpcode(), AccRecInstr->getType());
+      Constant *Identity =
+          ConstantExpr::getIdentity(AccRecInstr, AccRecInstr->getType());
       AccPN->addIncoming(Identity, P);
     } else {
       AccPN->addIncoming(AccPN, P);
diff --git a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
index a9ed46b201c1d..e9e0eabbabaa2 100644
--- a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
+++ b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
@@ -7,17 +7,21 @@ define noundef i32 @umin(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @umin
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 -1
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.umin.i32(i32 -1, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umin(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null
@@ -40,17 +44,21 @@ define noundef i32 @umax(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @umax
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 0
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.umax.i32(i32 0, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umax(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null
@@ -73,17 +81,21 @@ define noundef i32 @smin(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @smin
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 2147483647, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 2147483647
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.smin.i32(i32 2147483647, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smin(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null
@@ -106,17 +118,21 @@ define noundef i32 @smax(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @smax
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 -2147483648
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.smax.i32(i32 -2147483648, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smax(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null



More information about the llvm-commits mailing list