[llvm] [IR][TRE] Support associative intrinsics (PR #74226)

Joshua Cao via llvm-commits llvm-commits at lists.llvm.org
Sun Dec 3 15:45:49 PST 2023


https://github.com/caojoshua updated https://github.com/llvm/llvm-project/pull/74226

>From c31bb8864f9327ab7c101ad0179ef1c1b893e54f Mon Sep 17 00:00:00 2001
From: Joshua Cao <cao.joshua at yahoo.com>
Date: Sun, 14 May 2023 21:12:50 -0700
Subject: [PATCH 1/4] [TRE] Add tests for intrinsic accumulators

---
 .../TailCallElim/tre-minmax-intrinsic.ll      | 141 ++++++++++++++++++
 1 file changed, 141 insertions(+)
 create mode 100644 llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll

diff --git a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
new file mode 100644
index 0000000000000..a9ed46b201c1d
--- /dev/null
+++ b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
@@ -0,0 +1,141 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -passes=tailcallelim -verify-dom-info -S | FileCheck %s
+
+%struct.ListNode = type { i32, ptr }
+
+define noundef i32 @umin(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @umin
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 -1
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umin(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -1, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @umin(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+define noundef i32 @umax(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @umax
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 0
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umax(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ 0, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @umax(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.umax.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+define noundef i32 @smin(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @smin
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 2147483647
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smin(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ 2147483647, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @smin(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.smin.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+define noundef i32 @smax(ptr noundef readonly %a) {
+; CHECK-LABEL: define noundef i32 @smax
+; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 -2147483648
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smax(ptr noundef [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -2147483648, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call noundef i32 @smax(ptr noundef %1)
+  %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+declare i32 @llvm.umin.i32(i32, i32)
+declare i32 @llvm.umax.i32(i32, i32)
+declare i32 @llvm.smin.i32(i32, i32)
+declare i32 @llvm.smax.i32(i32, i32)

>From 91b4d52a69ad7df8e652f02b0b9d98c8e45f94fe Mon Sep 17 00:00:00 2001
From: Joshua Cao <cao.joshua at yahoo.com>
Date: Sat, 2 Dec 2023 20:05:24 -0800
Subject: [PATCH 2/4] [IR][TRE] Support associative intrinsics

There is support for intrinsics in Instruction::isCommunative, but there
is no equivalent implementation for isAssociative. This patch builds
support for associative intrinsics with TRE as an application. TRE can
now have associative intrinsics as an accumulator. For example:
```
struct Node {
  Node *next;
  unsigned val;
}

unsigned maxval(struct Node *n) {
  if (!n) return 0;
  return std::max(n->val, maxval(n->next));
}
```
Can be transformed into:
```
unsigned maxval(struct Node *n) {
  struct Node *head = n;
  unsigned max = 0; // Identity of unsigned std::max
  while (true) {
    if (!head) return max;
    max = std::max(max, head->val);
    head = head->next;
  }
  return max;
}
```
This example results in about 5x speedup in local runs.

We conservatively only consider min/max and as associative for this
patch to limit testing scope. There are probably other intrinsics that
could be considered associative. There are a few consumers of
isAssociative() that could be impacted. Testing has only required to
Reassociate pass be updated.
---
 llvm/include/llvm/IR/Constants.h              | 24 +++--
 llvm/include/llvm/IR/IntrinsicInst.h          | 12 +++
 llvm/lib/IR/Constants.cpp                     | 26 ++++++
 llvm/lib/IR/Instruction.cpp                   |  2 +
 llvm/lib/Transforms/Scalar/Reassociate.cpp    |  2 +-
 .../Scalar/TailRecursionElimination.cpp       | 26 ++++--
 .../TailCallElim/tre-minmax-intrinsic.ll      | 88 +++++++++++--------
 7 files changed, 129 insertions(+), 51 deletions(-)

diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 2f7fc5652c2cd..c3f0d98871c82 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -27,6 +27,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/User.h"
 #include "llvm/IR/Value.h"
@@ -1095,18 +1096,25 @@ class ConstantExpr : public Constant {
   static Constant *getExactLogBase2(Constant *C);
 
   /// Return the identity constant for a binary opcode.
-  /// The identity constant C is defined as X op C = X and C op X = X for every
-  /// X when the binary operation is commutative. If the binop is not
-  /// commutative, callers can acquire the operand 1 identity constant by
-  /// setting AllowRHSConstant to true. For example, any shift has a zero
-  /// identity constant for operand 1: X shift 0 = X.
-  /// If this is a fadd/fsub operation and we don't care about signed zeros,
-  /// then setting NSZ to true returns the identity +0.0 instead of -0.0.
-  /// Return nullptr if the operator does not have an identity constant.
+  /// If the binop is not commutative, callers can acquire the operand 1
+  /// identity constant by setting AllowRHSConstant to true. For example, any
+  /// shift has a zero identity constant for operand 1: X shift 0 = X. If this
+  /// is a fadd/fsub operation and we don't care about signed zeros, then
+  /// setting NSZ to true returns the identity +0.0 instead of -0.0. Return
+  /// nullptr if the operator does not have an identity constant.
   static Constant *getBinOpIdentity(unsigned Opcode, Type *Ty,
                                     bool AllowRHSConstant = false,
                                     bool NSZ = false);
 
+  static Constant *getIntrinsicIdentity(Intrinsic::ID, Type *Ty);
+
+  /// Return the identity constant for a binary or intrinsic Instruction.
+  /// The identity constant C is defined as X op C = X and C op X = X where C
+  /// and X are the first two operands, and the operation is commutative.
+  static Constant *getIdentity(Instruction *I, Type *Ty,
+                                    bool AllowRHSConstant = false,
+                                    bool NSZ = false);
+
   /// Return the absorbing element for the given binary
   /// operation, i.e. a constant C such that X op C = C and C op X = C for
   /// every X.  For example, this returns zero for integer multiplication.
diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index c26ecef6eaaee..8940bebd2c9a2 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -55,6 +55,18 @@ class IntrinsicInst : public CallInst {
     return getCalledFunction()->getIntrinsicID();
   }
 
+  bool isAssociative() const {
+    switch (getIntrinsicID()) {
+    case Intrinsic::smax:
+    case Intrinsic::smin:
+    case Intrinsic::umax:
+    case Intrinsic::umin:
+      return true;
+    default:
+      return false;
+    }
+  }
+
   /// Return true if swapping the first two arguments to the intrinsic produces
   /// the same result.
   bool isCommutative() const {
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index bc55d5b485271..872e0a586f137 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2556,6 +2556,32 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
   }
 }
 
+Constant *ConstantExpr::getIntrinsicIdentity(Intrinsic::ID ID, Type *Ty) {
+  switch (ID) {
+  case Intrinsic::umax:
+      return Constant::getNullValue(Ty);
+  case Intrinsic::umin:
+      return Constant::getAllOnesValue(Ty);
+  case Intrinsic::smax:
+      return Constant::getIntegerValue(
+          Ty, APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+  case Intrinsic::smin:
+      return Constant::getIntegerValue(
+          Ty, APInt::getSignedMaxValue(Ty->getIntegerBitWidth()));
+  default:
+      return nullptr;
+  }
+}
+
+Constant *ConstantExpr::getIdentity(Instruction *I, Type *Ty,
+                                    bool AllowRHSConstant, bool NSZ) {
+  if (I->isBinaryOp())
+      return getBinOpIdentity(I->getOpcode(), Ty, AllowRHSConstant, NSZ);
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+      return getIntrinsicIdentity(II->getIntrinsicID(), Ty);
+  return nullptr;
+}
+
 Constant *ConstantExpr::getBinOpAbsorber(unsigned Opcode, Type *Ty) {
   switch (Opcode) {
   default:
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 4b5349856b8d7..a6e43de5978b5 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -1091,6 +1091,8 @@ const DebugLoc &Instruction::getStableDebugLoc() const {
 }
 
 bool Instruction::isAssociative() const {
+  if (auto *II = dyn_cast<IntrinsicInst>(this))
+    return II->isAssociative();
   unsigned Opcode = getOpcode();
   if (isAssociative(Opcode))
     return true;
diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp
index 0d55c72e407e9..d3f6d24d90961 100644
--- a/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -2554,7 +2554,7 @@ ReassociatePass::BuildPairMap(ReversePostOrderTraversal<Function *> &RPOT) {
   // Make a "pairmap" of how often each operand pair occurs.
   for (BasicBlock *BI : RPOT) {
     for (Instruction &I : *BI) {
-      if (!I.isAssociative())
+      if (!I.isAssociative() || !I.isBinaryOp())
         continue;
 
       // Ignore nodes that aren't at the root of trees.
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 7b850f05bec11..a8464f5ee9ce0 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -369,12 +369,26 @@ static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
   if (!I->isAssociative() || !I->isCommutative())
     return false;
 
-  assert(I->getNumOperands() == 2 &&
-         "Associative/commutative operations should have 2 args!");
+  Value *LHS;
+  Value *RHS;
+  if (I->isBinaryOp()) {
+    LHS = I->getOperand(0);
+    RHS = I->getOperand(1);
+  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    // Accumulators must have an identity.
+    if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), I->getType()))
+      return false;
+    // 0'th operand is the intrinsic function
+    LHS = I->getOperand(1);
+    RHS = I->getOperand(2);
+  } else {
+    llvm_unreachable(
+        "commutative operations must be either a binary or intrinsic op");
+  }
+
 
   // Exactly one operand should be the result of the call instruction.
-  if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
-      (I->getOperand(0) != CI && I->getOperand(1) != CI))
+  if ((LHS == CI && RHS == CI) || (LHS != CI && RHS != CI))
     return false;
 
   // The only user of this instruction we allow is a single return instruction.
@@ -569,8 +583,8 @@ void TailRecursionEliminator::insertAccumulator(Instruction *AccRecInstr) {
   for (pred_iterator PI = PB; PI != PE; ++PI) {
     BasicBlock *P = *PI;
     if (P == &F.getEntryBlock()) {
-      Constant *Identity = ConstantExpr::getBinOpIdentity(
-          AccRecInstr->getOpcode(), AccRecInstr->getType());
+      Constant *Identity =
+          ConstantExpr::getIdentity(AccRecInstr, AccRecInstr->getType());
       AccPN->addIncoming(Identity, P);
     } else {
       AccPN->addIncoming(AccPN, P);
diff --git a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
index a9ed46b201c1d..e9e0eabbabaa2 100644
--- a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
+++ b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
@@ -7,17 +7,21 @@ define noundef i32 @umin(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @umin
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 -1
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.umin.i32(i32 -1, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umin(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null
@@ -40,17 +44,21 @@ define noundef i32 @umax(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @umax
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 0
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.umax.i32(i32 0, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @umax(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.umax.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null
@@ -73,17 +81,21 @@ define noundef i32 @smin(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @smin
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 2147483647, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 2147483647
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.smin.i32(i32 2147483647, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smin(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.smin.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null
@@ -106,17 +118,21 @@ define noundef i32 @smax(ptr noundef readonly %a) {
 ; CHECK-LABEL: define noundef i32 @smax
 ; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
 ; CHECK:       common.ret6:
-; CHECK-NEXT:    ret i32 -2147483648
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.smax.i32(i32 -2147483648, i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
-; CHECK-NEXT:    [[CALL:%.*]] = tail call noundef i32 @smax(ptr noundef [[TMP1]])
-; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[CALL]])
-; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.smax.i32(i32 [[TMP0]], i32 [[ACCUMULATOR_TR]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
 ;
 entry:
   %tobool.not = icmp eq ptr %a, null

>From bf1b400ad15a2cbb6554a20782a94965275d8913 Mon Sep 17 00:00:00 2001
From: Joshua Cao <cao.joshua at yahoo.com>
Date: Sat, 2 Dec 2023 22:40:12 -0800
Subject: [PATCH 3/4] fix formatting

---
 llvm/include/llvm/IR/Constants.h               |  3 +--
 llvm/lib/IR/Constants.cpp                      | 18 +++++++++---------
 .../Scalar/TailRecursionElimination.cpp        |  1 -
 3 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index c3f0d98871c82..0b9f89830b79c 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -1112,8 +1112,7 @@ class ConstantExpr : public Constant {
   /// The identity constant C is defined as X op C = X and C op X = X where C
   /// and X are the first two operands, and the operation is commutative.
   static Constant *getIdentity(Instruction *I, Type *Ty,
-                                    bool AllowRHSConstant = false,
-                                    bool NSZ = false);
+                               bool AllowRHSConstant = false, bool NSZ = false);
 
   /// Return the absorbing element for the given binary
   /// operation, i.e. a constant C such that X op C = C and C op X = C for
diff --git a/llvm/lib/IR/Constants.cpp b/llvm/lib/IR/Constants.cpp
index 872e0a586f137..a38b912164b13 100644
--- a/llvm/lib/IR/Constants.cpp
+++ b/llvm/lib/IR/Constants.cpp
@@ -2559,26 +2559,26 @@ Constant *ConstantExpr::getBinOpIdentity(unsigned Opcode, Type *Ty,
 Constant *ConstantExpr::getIntrinsicIdentity(Intrinsic::ID ID, Type *Ty) {
   switch (ID) {
   case Intrinsic::umax:
-      return Constant::getNullValue(Ty);
+    return Constant::getNullValue(Ty);
   case Intrinsic::umin:
-      return Constant::getAllOnesValue(Ty);
+    return Constant::getAllOnesValue(Ty);
   case Intrinsic::smax:
-      return Constant::getIntegerValue(
-          Ty, APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
+    return Constant::getIntegerValue(
+        Ty, APInt::getSignedMinValue(Ty->getIntegerBitWidth()));
   case Intrinsic::smin:
-      return Constant::getIntegerValue(
-          Ty, APInt::getSignedMaxValue(Ty->getIntegerBitWidth()));
+    return Constant::getIntegerValue(
+        Ty, APInt::getSignedMaxValue(Ty->getIntegerBitWidth()));
   default:
-      return nullptr;
+    return nullptr;
   }
 }
 
 Constant *ConstantExpr::getIdentity(Instruction *I, Type *Ty,
                                     bool AllowRHSConstant, bool NSZ) {
   if (I->isBinaryOp())
-      return getBinOpIdentity(I->getOpcode(), Ty, AllowRHSConstant, NSZ);
+    return getBinOpIdentity(I->getOpcode(), Ty, AllowRHSConstant, NSZ);
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-      return getIntrinsicIdentity(II->getIntrinsicID(), Ty);
+    return getIntrinsicIdentity(II->getIntrinsicID(), Ty);
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index a8464f5ee9ce0..f031298d5e8c7 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -386,7 +386,6 @@ static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
         "commutative operations must be either a binary or intrinsic op");
   }
 
-
   // Exactly one operand should be the result of the call instruction.
   if ((LHS == CI && RHS == CI) || (LHS != CI && RHS != CI))
     return false;

>From fcc431af293dca4aa98eb80fefe00cbfc836bce9 Mon Sep 17 00:00:00 2001
From: Joshua Cao <cao.joshua at yahoo.com>
Date: Sun, 3 Dec 2023 14:58:36 -0800
Subject: [PATCH 4/4] * Look at correct operands for intrinsics instruction *
 Add test for commutative, non-associative intrinsic * Add tests where call
 output is accumulator 0'th input

---
 .../Scalar/TailRecursionElimination.cpp       |  19 +-
 .../TailCallElim/accum_recursion.ll           |  40 +++-
 .../TailCallElim/tre-minmax-intrinsic.ll      | 180 ++++++++++++++++--
 3 files changed, 209 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index f031298d5e8c7..c6e8505d5ab4b 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -369,25 +369,18 @@ static bool canTransformAccumulatorRecursion(Instruction *I, CallInst *CI) {
   if (!I->isAssociative() || !I->isCommutative())
     return false;
 
-  Value *LHS;
-  Value *RHS;
-  if (I->isBinaryOp()) {
-    LHS = I->getOperand(0);
-    RHS = I->getOperand(1);
-  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+  assert(I->getNumOperands() >= 2 &&
+         "Associative/commutative operations should have at least 2 args!");
+
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     // Accumulators must have an identity.
     if (!ConstantExpr::getIntrinsicIdentity(II->getIntrinsicID(), I->getType()))
       return false;
-    // 0'th operand is the intrinsic function
-    LHS = I->getOperand(1);
-    RHS = I->getOperand(2);
-  } else {
-    llvm_unreachable(
-        "commutative operations must be either a binary or intrinsic op");
   }
 
   // Exactly one operand should be the result of the call instruction.
-  if ((LHS == CI && RHS == CI) || (LHS != CI && RHS != CI))
+  if ((I->getOperand(0) == CI && I->getOperand(1) == CI) ||
+      (I->getOperand(0) != CI && I->getOperand(1) != CI))
     return false;
 
   // The only user of this instruction we allow is a single return instruction.
diff --git a/llvm/test/Transforms/TailCallElim/accum_recursion.ll b/llvm/test/Transforms/TailCallElim/accum_recursion.ll
index 6ddb321e66649..c093f026e4678 100644
--- a/llvm/test/Transforms/TailCallElim/accum_recursion.ll
+++ b/llvm/test/Transforms/TailCallElim/accum_recursion.ll
@@ -78,7 +78,7 @@ define i64 @test3_fib(i64 %n) nounwind readnone {
 ; CHECK-NEXT:    ]
 ; CHECK:       bb1:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N_TR]], -1
-; CHECK-NEXT:    [[RECURSE1:%.*]] = tail call i64 @test3_fib(i64 [[TMP0]]) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT:    [[RECURSE1:%.*]] = tail call i64 @test3_fib(i64 [[TMP0]]) #[[ATTR2:[0-9]+]]
 ; CHECK-NEXT:    [[TMP1]] = add i64 [[N_TR]], -2
 ; CHECK-NEXT:    [[ACCUMULATE]] = add nsw i64 [[ACCUMULATOR_TR]], [[RECURSE1]]
 ; CHECK-NEXT:    br label [[TAILRECURSE]]
@@ -290,3 +290,41 @@ return:
   %retval.0 = phi i32 [ %accumulate1, %if.then2 ], [ %accumulate2, %if.end3 ], [ 0, %entry ]
   ret i32 %retval.0
 }
+
+%struct.ListNode = type { i32, ptr }
+
+; We cannot TRE commutative, non-associative intrinsics
+define i32 @test_non_associative_sadd_sat(ptr %a) local_unnamed_addr {
+; CHECK-LABEL: define i32 @test_non_associative_sadd_sat(
+; CHECK-SAME: ptr [[A:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END:%.*]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    ret i32 -1
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @test_non_associative_sadd_sat(ptr [[TMP1]])
+; CHECK-NEXT:    [[DOTSROA_SPECULATED:%.*]] = tail call i32 @llvm.sadd.sat.i32(i32 [[TMP0]], i32 [[CALL]])
+; CHECK-NEXT:    ret i32 [[DOTSROA_SPECULATED]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -1, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call i32 @test_non_associative_sadd_sat(ptr %1)
+  %.sroa.speculated = tail call i32 @llvm.sadd.sat.i32(i32 %0, i32 %call)
+  br label %common.ret6
+}
+
+declare i32 @llvm.sadd.sat.i32(i32, i32)
diff --git a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
index e9e0eabbabaa2..f8d9f9f9fbcb7 100644
--- a/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
+++ b/llvm/test/Transforms/TailCallElim/tre-minmax-intrinsic.ll
@@ -3,9 +3,9 @@
 
 %struct.ListNode = type { i32, ptr }
 
-define noundef i32 @umin(ptr noundef readonly %a) {
-; CHECK-LABEL: define noundef i32 @umin
-; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+define i32 @umin(ptr readonly %a) {
+; CHECK-LABEL: define i32 @umin
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
 ; CHECK:       tailrecurse:
@@ -35,14 +35,51 @@ if.end:                                           ; preds = %entry
   %0 = load i32, ptr %a
   %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
   %1 = load ptr, ptr %next
-  %call = tail call noundef i32 @umin(ptr noundef %1)
+  %call = tail call i32 @umin(ptr %1)
   %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %0, i32 %call)
   br label %common.ret6
 }
 
-define noundef i32 @umax(ptr noundef readonly %a) {
-; CHECK-LABEL: define noundef i32 @umax
-; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+define i32 @umin2(ptr readonly %a) {
+; CHECK-LABEL: define i32 @umin2
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.umin.i32(i32 [[ACCUMULATOR_TR]], i32 -1)
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.umin.i32(i32 [[ACCUMULATOR_TR]], i32 [[TMP0]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -1, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call i32 @umin2(ptr %1)
+  %.sroa.speculated = tail call i32 @llvm.umin.i32(i32 %call, i32 %0)
+  br label %common.ret6
+}
+
+define i32 @umax(ptr readonly %a) {
+; CHECK-LABEL: define i32 @umax
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
 ; CHECK:       tailrecurse:
@@ -72,14 +109,51 @@ if.end:                                           ; preds = %entry
   %0 = load i32, ptr %a
   %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
   %1 = load ptr, ptr %next
-  %call = tail call noundef i32 @umax(ptr noundef %1)
+  %call = tail call i32 @umax(ptr %1)
   %.sroa.speculated = tail call i32 @llvm.umax.i32(i32 %0, i32 %call)
   br label %common.ret6
 }
 
-define noundef i32 @smin(ptr noundef readonly %a) {
-; CHECK-LABEL: define noundef i32 @smin
-; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+define i32 @umax2(ptr readonly %a) {
+; CHECK-LABEL: define i32 @umax2
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.umax.i32(i32 [[ACCUMULATOR_TR]], i32 0)
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.umax.i32(i32 [[ACCUMULATOR_TR]], i32 [[TMP0]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ 0, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call i32 @umax2(ptr %1)
+  %.sroa.speculated = tail call i32 @llvm.umax.i32(i32 %call, i32 %0)
+  br label %common.ret6
+}
+
+define i32 @smin(ptr readonly %a) {
+; CHECK-LABEL: define i32 @smin
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
 ; CHECK:       tailrecurse:
@@ -109,14 +183,51 @@ if.end:                                           ; preds = %entry
   %0 = load i32, ptr %a
   %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
   %1 = load ptr, ptr %next
-  %call = tail call noundef i32 @smin(ptr noundef %1)
+  %call = tail call i32 @smin(ptr %1)
   %.sroa.speculated = tail call i32 @llvm.smin.i32(i32 %0, i32 %call)
   br label %common.ret6
 }
 
-define noundef i32 @smax(ptr noundef readonly %a) {
-; CHECK-LABEL: define noundef i32 @smax
-; CHECK-SAME: (ptr noundef readonly [[A:%.*]]) {
+define i32 @smin2(ptr readonly %a) {
+; CHECK-LABEL: define i32 @smin2
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ 2147483647, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ACCUMULATOR_TR]], i32 2147483647)
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.smin.i32(i32 [[ACCUMULATOR_TR]], i32 [[TMP0]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ 2147483647, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call i32 @smin2(ptr %1)
+  %.sroa.speculated = tail call i32 @llvm.smin.i32(i32 %call, i32 %0)
+  br label %common.ret6
+}
+
+define i32 @smax(ptr readonly %a) {
+; CHECK-LABEL: define i32 @smax
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
 ; CHECK:       tailrecurse:
@@ -146,11 +257,48 @@ if.end:                                           ; preds = %entry
   %0 = load i32, ptr %a
   %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
   %1 = load ptr, ptr %next
-  %call = tail call noundef i32 @smax(ptr noundef %1)
+  %call = tail call i32 @smax(ptr %1)
   %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %0, i32 %call)
   br label %common.ret6
 }
 
+define i32 @smax2(ptr readonly %a) {
+; CHECK-LABEL: define i32 @smax2
+; CHECK-SAME: (ptr readonly [[A:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[ACCUMULATOR_TR:%.*]] = phi i32 [ -2147483648, [[ENTRY:%.*]] ], [ [[DOTSROA_SPECULATED:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[A_TR:%.*]] = phi ptr [ [[A]], [[ENTRY]] ], [ [[TMP1:%.*]], [[IF_END]] ]
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq ptr [[A_TR]], null
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[COMMON_RET6:%.*]], label [[IF_END]]
+; CHECK:       common.ret6:
+; CHECK-NEXT:    [[ACCUMULATOR_RET_TR:%.*]] = tail call i32 @llvm.smax.i32(i32 [[ACCUMULATOR_TR]], i32 -2147483648)
+; CHECK-NEXT:    ret i32 [[ACCUMULATOR_RET_TR]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_TR]], align 4
+; CHECK-NEXT:    [[NEXT:%.*]] = getelementptr inbounds [[STRUCT_LISTNODE:%.*]], ptr [[A_TR]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP1]] = load ptr, ptr [[NEXT]], align 8
+; CHECK-NEXT:    [[DOTSROA_SPECULATED]] = tail call i32 @llvm.smax.i32(i32 [[ACCUMULATOR_TR]], i32 [[TMP0]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
+;
+entry:
+  %tobool.not = icmp eq ptr %a, null
+  br i1 %tobool.not, label %common.ret6, label %if.end
+
+common.ret6:                                      ; preds = %entry, %if.end
+  %common.ret6.op = phi i32 [ %.sroa.speculated, %if.end ], [ -2147483648, %entry ]
+  ret i32 %common.ret6.op
+
+if.end:                                           ; preds = %entry
+  %0 = load i32, ptr %a
+  %next = getelementptr inbounds %struct.ListNode, ptr %a, i64 0, i32 1
+  %1 = load ptr, ptr %next
+  %call = tail call i32 @smax2(ptr %1)
+  %.sroa.speculated = tail call i32 @llvm.smax.i32(i32 %call, i32 %0)
+  br label %common.ret6
+}
+
 declare i32 @llvm.umin.i32(i32, i32)
 declare i32 @llvm.umax.i32(i32, i32)
 declare i32 @llvm.smin.i32(i32, i32)



More information about the llvm-commits mailing list