[llvm] e9a1c82 - [SCEVExpander] Expand umin_seq using freeze

Wed May 18 00:54:23 PDT 2022

Author: Nikita Popov
Date: 2022-05-18T09:53:07+02:00
New Revision: e9a1c82d695472820c93af40cbf3d9fde2a149c6

URL: https://github.com/llvm/llvm-project/commit/e9a1c82d695472820c93af40cbf3d9fde2a149c6
DIFF: https://github.com/llvm/llvm-project/commit/e9a1c82d695472820c93af40cbf3d9fde2a149c6.diff

LOG: [SCEVExpander] Expand umin_seq using freeze

%x umin_seq %y is currently expanded to %x == 0 ? 0 : umin(%x, %y).
This patch changes the expansion to umin(%x, freeze %y) instead
(https://alive2.llvm.org/ce/z/wujUhp).

The motivation for this change are the test cases affected by
D124910, where the freeze expansion ultimately produces better
optimization results. This is largely because
`(%x umin_seq %y) == %x` is a common expansion pattern, which
reliably optimizes in freeze representation, but only sometimes
with the zero comparison (in particular, if %x == 0 can fold to
something else, we generally won't be able to cover reasonable
code from this.)

Differential Revision: https://reviews.llvm.org/D125372

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
    llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
    llvm/test/Transforms/IndVarSimplify/exit-count-select.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
index 0161fa0c813a1..260ed1a978316 100644

--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -449,7 +449,7 @@ class SCEVExpander : public SCEVVisitor<SCEVExpander, Value *> {
   const Loop *getRelevantLoop(const SCEV *);
 
   Value *expandMinMaxExpr(const SCEVNAryExpr *S, Intrinsic::ID IntrinID,
-                          Twine Name);
+                          Twine Name, bool IsSequential = false);
 
   Value *visitConstant(const SCEVConstant *S) { return S->getValue(); }
 

diff  --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 208bd846405c1..683d197c1ea2a 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1671,11 +1671,16 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
 }
 
 Value *SCEVExpander::expandMinMaxExpr(const SCEVNAryExpr *S,
-                                      Intrinsic::ID IntrinID, Twine Name) {
+                                      Intrinsic::ID IntrinID, Twine Name,
+                                      bool IsSequential) {
   Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
   Type *Ty = LHS->getType();
+  if (IsSequential)
+    LHS = Builder.CreateFreeze(LHS);
   for (int i = S->getNumOperands() - 2; i >= 0; --i) {
     Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false);
+    if (IsSequential && i != 0)
+      RHS = Builder.CreateFreeze(RHS);
     Value *Sel;
     if (Ty->isIntegerTy())
       Sel = Builder.CreateIntrinsic(IntrinID, {Ty}, {LHS, RHS},
@@ -1707,21 +1712,7 @@ Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
 }
 
 Value *SCEVExpander::visitSequentialUMinExpr(const SCEVSequentialUMinExpr *S) {
-  SmallVector<Value *> Ops;
-  for (const SCEV *Op : S->operands())
-    Ops.emplace_back(expand(Op));
-
-  Value *SaturationPoint =
-      MinMaxIntrinsic::getSaturationPoint(Intrinsic::umin, S->getType());
-
-  SmallVector<Value *> OpIsZero;
-  for (Value *Op : ArrayRef<Value *>(Ops).drop_back())
-    OpIsZero.emplace_back(Builder.CreateICmpEQ(Op, SaturationPoint));
-
-  Value *AnyOpIsZero = Builder.CreateLogicalOr(OpIsZero);
-
-  Value *NaiveUMin = expandMinMaxExpr(S, Intrinsic::umin, "umin");
-  return Builder.CreateSelect(AnyOpIsZero, SaturationPoint, NaiveUMin);
+  return expandMinMaxExpr(S, Intrinsic::umin, "umin", /*IsSequential*/true);
 }
 
 Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty,

diff  --git a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
index 3ebafd1ef8b1b..e42e08a82d55f 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit-count-select.ll
@@ -4,14 +4,13 @@
 define i32 @logical_and_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_and_2ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[M:%.*]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[N]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i32 0, i32 [[UMIN]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
+; CHECK-NEXT:    ret i32 [[UMIN]]
 ;
 entry:
   br label %loop
@@ -29,14 +28,13 @@ exit:
 define i32 @logical_or_2ops(i32 %n, i32 %m) {
 ; CHECK-LABEL: @logical_or_2ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[M:%.*]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[M:%.*]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[N]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i32 0, i32 [[UMIN]]
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[N:%.*]])
+; CHECK-NEXT:    ret i32 [[UMIN]]
 ;
 entry:
   br label %loop
@@ -54,17 +52,15 @@ exit:
 define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: @logical_and_3ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[M:%.*]], 0
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[K:%.*]], i32 [[M]])
-; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 false, label [[LOOP]], label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[UMIN1]]
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
 entry:
   br label %loop
@@ -84,17 +80,15 @@ exit:
 define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) {
 ; CHECK-LABEL: @logical_or_3ops(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp eq i32 [[M:%.*]], 0
-; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[K:%.*]], i32 [[M]])
-; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT:    [[TMP0:%.*]] = freeze i32 [[K:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[M:%.*]]
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP0]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    br i1 true, label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N]], 0
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[TMP0]]
-; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 0, i32 [[UMIN1]]
-; CHECK-NEXT:    ret i32 [[TMP3]]
+; CHECK-NEXT:    [[UMIN1:%.*]] = call i32 @llvm.umin.i32(i32 [[UMIN]], i32 [[N:%.*]])
+; CHECK-NEXT:    ret i32 [[UMIN1]]
 ;
 entry:
   br label %loop