[llvm] [LoopPeel][TTI][AMDGPU] Flag to control aggressiveness of compare elimination peeling (PR #122780)

Lucas Ramirez via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 13 12:05:43 PST 2025


https://github.com/lucas-rami created https://github.com/llvm/llvm-project/pull/122780

This adds a flag to `TargetTransformInfo::PeelingPreferences` that controls loop peeling behavior's aggressiveness with respect to the elimination of compares inside loops. The current heuristic is quite aggressive; in particular, for a condition tree it can produce a peel count equal to the maximum peel count that eliminates at least one leaf of the condition tree, even when other leaves possibly remain unknown. This can lead to loop peeling without an actual reduction in the number of conditional branches inside the loop. While this is fine in most cases, on some targets (e.g., AMDGPU) the increase in loop size without a reduction in number of conditional branches can yield suboptimal performance ([see motivating example](https://discourse.llvm.org/t/unexpected-peeling-decision-when-trying-to-eliminate-compares-inside-loop/82866)).

The flag is set to true by default, meaning that for all backends but AMDGPU this does not change loop peeling behavior.

Fixes SWDEV-485683. 

>From 9252ef3c609f23b74617388e54b701632f0ffe7d Mon Sep 17 00:00:00 2001
From: Lucas Ramirez <lucas.rami at proton.me>
Date: Mon, 13 Jan 2025 20:43:29 +0100
Subject: [PATCH] Add AgressiveEliminateCompares lag to peeling preferences

---
 .../llvm/Analysis/TargetTransformInfo.h       |   4 +
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |   1 +
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      |   1 +
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  95 +++--
 .../LoopUnroll/peel-aggressive-comp-elim.ll   | 347 ++++++++++++++++++
 5 files changed, 426 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/peel-aggressive-comp-elim.ll

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 752313ab15858c..51c9bad9bcc487 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -663,6 +663,10 @@ class TargetTransformInfo {
     /// If the value is true the peeling cost model can decide to peel only
     /// some iterations and in this case it will set this to false.
     bool PeelProfiledIterations;
+    /// Allow aggressively peeling of loop iterations to eliminate compares.
+    /// When true, the elimination of any comparison in the loop is cause for
+    /// increasing the peel count.
+    bool AggresiveEliminateCompares;
   };
 
   /// Get target-customized preferences for the generic loop peeling
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index c9f142d64ae9e4..156fade32508c9 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -698,6 +698,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     PP.AllowPeeling = true;
     PP.AllowLoopNestsPeeling = false;
     PP.PeelProfiledIterations = true;
+    PP.AggresiveEliminateCompares = true;
   }
 
   bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 5160851f8c4424..538559ef1623f3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -272,6 +272,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
 void AMDGPUTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
                                           TTI::PeelingPreferences &PP) {
   BaseT::getPeelingPreferences(L, SE, PP);
+  PP.AggresiveEliminateCompares = false;
 }
 
 int64_t AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const {
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 9a24c1b0d03de7..0b6b077df2aa1b 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -64,6 +64,10 @@ static cl::opt<bool>
                                 cl::init(false), cl::Hidden,
                                 cl::desc("Allows loop nests to be peeled."));
 
+static cl::opt<bool> UnrollAggressiveEliminateCompares(
+    "unroll-allow-aggressive-eliminate-compares", cl::init(true), cl::Hidden,
+    cl::desc("Allow aggressive peeling to eliminate compares."));
+
 static cl::opt<unsigned> UnrollPeelMaxCount(
     "unroll-peel-max-count", cl::init(7), cl::Hidden,
     cl::desc("Max average trip count which will cause loop peeling."));
@@ -338,7 +342,12 @@ static unsigned peelToTurnInvariantLoadsDerefencebale(Loop &L,
 //    else
 //      ..
 //   }
+//
+// The EliminateBranches parameter controls whether we require all branch
+// conditions to be fully known after a calculated number of loop iterations to
+// return a non-zero peel count.
 static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
+                                         bool EliminateBranches,
                                          ScalarEvolution &SE) {
   assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form");
   unsigned DesiredPeelCount = 0;
@@ -364,23 +373,44 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
                                    BoundSCEV);
       };
 
+  // Attempts to compute the number of iterations that eliminates compares for a
+  // (potential tree of) condition(s) rooted at Condition. Returns std::nullopt
+  // when that number cannot be calculated, or a value greater than or equal to
+  // MinPeelCount when it can. For condition trees, the FullTreeRes parameter
+  // controls whether all tree leaves need to have a known peel count for the
+  // top-level call to return a non-null peel count.
   const unsigned MaxDepth = 4;
-  std::function<void(Value *, unsigned)> ComputePeelCount =
-      [&](Value *Condition, unsigned Depth) -> void {
+  std::function<std::optional<unsigned>(Value *, unsigned, bool, unsigned)>
+      ComputePeelCountCond = [&](Value *Condition, unsigned MinPeelCount,
+                                 bool FullTreeRes,
+                                 unsigned Depth) -> std::optional<unsigned> {
     if (!Condition->getType()->isIntegerTy() || Depth >= MaxDepth)
-      return;
+      return std::nullopt;
 
     Value *LeftVal, *RightVal;
     if (match(Condition, m_And(m_Value(LeftVal), m_Value(RightVal))) ||
         match(Condition, m_Or(m_Value(LeftVal), m_Value(RightVal)))) {
-      ComputePeelCount(LeftVal, Depth + 1);
-      ComputePeelCount(RightVal, Depth + 1);
-      return;
+      // When FullTreeRes is true, the returned peel count is the maximum
+      // calculated peel count over all of the condition tree's leaves, even if
+      // the peel count cannot be calculated at all for some leaves. On the
+      // contrary, when it is false the peel count of a tree that has at least
+      // one leaf whose peel count cannot be calculated is null.
+      auto RecOnLeaf = [&](Value *SubCond) -> bool {
+        std::optional<unsigned> PeelCount =
+            ComputePeelCountCond(SubCond, FullTreeRes, MinPeelCount, Depth + 1);
+        if (FullTreeRes && !PeelCount)
+          return false;
+        MinPeelCount = std::max(MinPeelCount, PeelCount.value_or(0));
+        return true;
+      };
+      if (!RecOnLeaf(LeftVal) || !RecOnLeaf(RightVal))
+        return std::nullopt;
+      return MinPeelCount;
     }
 
     CmpPredicate Pred;
     if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal))))
-      return;
+      return std::nullopt;
 
     const SCEV *LeftSCEV = SE.getSCEV(LeftVal);
     const SCEV *RightSCEV = SE.getSCEV(RightVal);
@@ -388,7 +418,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
     // Do not consider predicates that are known to be true or false
     // independently of the loop iteration.
     if (SE.evaluatePredicate(Pred, LeftSCEV, RightSCEV))
-      return;
+      return std::nullopt;
 
     // Check if we have a condition with one AddRec and one non AddRec
     // expression. Normalize LeftSCEV to be the AddRec.
@@ -396,8 +426,9 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
       if (isa<SCEVAddRecExpr>(RightSCEV)) {
         std::swap(LeftSCEV, RightSCEV);
         Pred = ICmpInst::getSwappedPredicate(Pred);
-      } else
-        return;
+      } else {
+        return std::nullopt;
+      }
     }
 
     const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV);
@@ -405,14 +436,14 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
     // Avoid huge SCEV computations in the loop below, make sure we only
     // consider AddRecs of the loop we are trying to peel.
     if (!LeftAR->isAffine() || LeftAR->getLoop() != &L)
-      return;
+      return std::nullopt;
     if (!(ICmpInst::isEquality(Pred) && LeftAR->hasNoSelfWrap()) &&
         !SE.getMonotonicPredicateType(LeftAR, Pred))
-      return;
+      return std::nullopt;
 
-    // Check if extending the current DesiredPeelCount lets us evaluate Pred
+    // Check if extending the current MinPeelCount lets us evaluate Pred
     // or !Pred in the loop body statically.
-    unsigned NewPeelCount = DesiredPeelCount;
+    unsigned NewPeelCount = MinPeelCount;
 
     const SCEV *IterVal = LeftAR->evaluateAtIteration(
         SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE);
@@ -426,7 +457,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
     const SCEV *Step = LeftAR->getStepRecurrence(SE);
     if (!PeelWhilePredicateIsKnown(NewPeelCount, IterVal, RightSCEV, Step,
                                    Pred))
-      return;
+      return std::nullopt;
 
     // However, for equality comparisons, that isn't always sufficient to
     // eliminate the comparsion in loop body, we may need to peel one more
@@ -437,12 +468,22 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
                              RightSCEV) &&
         !SE.isKnownPredicate(Pred, IterVal, RightSCEV) &&
         SE.isKnownPredicate(Pred, NextIterVal, RightSCEV)) {
-      if (NewPeelCount >= MaxPeelCount)
-        return; // Need to peel one more iteration, but can't. Give up.
+      if (NewPeelCount >= MaxPeelCount) {
+        // Need to peel one more iteration, but can't. Give up.
+        return std::nullopt;
+      }
       ++NewPeelCount; // Great!
     }
 
-    DesiredPeelCount = std::max(DesiredPeelCount, NewPeelCount);
+    return NewPeelCount;
+  };
+
+  auto ComputePeelCountCondWrapper = [&](Value *Condition,
+                                         bool FullTreeRes) -> bool {
+    std::optional<unsigned> PeelCount =
+        ComputePeelCountCond(Condition, DesiredPeelCount, FullTreeRes, 0);
+    DesiredPeelCount = PeelCount.value_or(DesiredPeelCount);
+    return PeelCount.has_value();
   };
 
   auto ComputePeelCountMinMax = [&](MinMaxIntrinsic *MinMax) {
@@ -488,7 +529,7 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
   for (BasicBlock *BB : L.blocks()) {
     for (Instruction &I : *BB) {
       if (SelectInst *SI = dyn_cast<SelectInst>(&I))
-        ComputePeelCount(SI->getCondition(), 0);
+        ComputePeelCountCondWrapper(SI->getCondition(), /*FullTreeRes=*/false);
       if (MinMaxIntrinsic *MinMax = dyn_cast<MinMaxIntrinsic>(&I))
         ComputePeelCountMinMax(MinMax);
     }
@@ -501,7 +542,12 @@ static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount,
     if (L.getLoopLatch() == BB)
       continue;
 
-    ComputePeelCount(BI->getCondition(), 0);
+    if (!ComputePeelCountCondWrapper(BI->getCondition(), EliminateBranches) &&
+        EliminateBranches) {
+      // We don't want to peel due to compare elimination when all branch
+      // conditions cannot be fully statically determined.
+      return 0;
+    }
   }
 
   return DesiredPeelCount;
@@ -597,8 +643,10 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
       DesiredPeelCount = std::max(DesiredPeelCount, *NumPeels);
   }
 
-  DesiredPeelCount = std::max(DesiredPeelCount,
-                              countToEliminateCompares(*L, MaxPeelCount, SE));
+  DesiredPeelCount =
+      std::max(DesiredPeelCount,
+               countToEliminateCompares(*L, MaxPeelCount,
+                                        !PP.AggresiveEliminateCompares, SE));
 
   if (DesiredPeelCount == 0)
     DesiredPeelCount = peelToTurnInvariantLoadsDerefencebale(*L, DT, AC);
@@ -879,6 +927,7 @@ llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
   PP.AllowPeeling = true;
   PP.AllowLoopNestsPeeling = false;
   PP.PeelProfiledIterations = true;
+  PP.AggresiveEliminateCompares = true;
 
   // Get the target specifc values.
   TTI.getPeelingPreferences(L, SE, PP);
@@ -891,6 +940,8 @@ llvm::gatherPeelingPreferences(Loop *L, ScalarEvolution &SE,
       PP.AllowPeeling = UnrollAllowPeeling;
     if (UnrollAllowLoopNestsPeeling.getNumOccurrences() > 0)
       PP.AllowLoopNestsPeeling = UnrollAllowLoopNestsPeeling;
+    if (UnrollAggressiveEliminateCompares.getNumOccurrences() > 0)
+      PP.AggresiveEliminateCompares = UnrollAggressiveEliminateCompares;
   }
 
   // User specifed values provided by argument.
diff --git a/llvm/test/Transforms/LoopUnroll/peel-aggressive-comp-elim.ll b/llvm/test/Transforms/LoopUnroll/peel-aggressive-comp-elim.ll
new file mode 100644
index 00000000000000..f841784f8eaf49
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/peel-aggressive-comp-elim.ll
@@ -0,0 +1,347 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=loop-unroll | FileCheck %s --check-prefix=AGGRESSIVE
+; RUN: opt < %s -S -passes=loop-unroll -unroll-allow-aggressive-eliminate-compares=false | FileCheck %s
+
+; The loop can only be peeled if aggressive compare elimination is allowed
+; because one of the condition's components depends on an unknown function
+; argument.
+
+define void @unknown_cond_prevents_peel(ptr noundef %mem, i1 noundef zeroext %cond) {
+; AGGRESSIVE-LABEL: @unknown_cond_prevents_peel(
+; AGGRESSIVE-NEXT:  entry:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL_BEGIN:%.*]]
+; AGGRESSIVE:       header.peel.begin:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL:%.*]]
+; AGGRESSIVE:       header.peel:
+; AGGRESSIVE-NEXT:    [[NOT_FIRST_IT_PEEL:%.*]] = icmp ugt i32 0, 0
+; AGGRESSIVE-NEXT:    [[IF_COND_PEEL:%.*]] = and i1 [[COND:%.*]], [[NOT_FIRST_IT_PEEL]]
+; AGGRESSIVE-NEXT:    br i1 [[IF_COND_PEEL]], label [[IF_PEEL:%.*]], label [[LATCH_EXITING_PEEL:%.*]]
+; AGGRESSIVE:       if.peel:
+; AGGRESSIVE-NEXT:    [[IV_EXT_PEEL:%.*]] = zext nneg i32 0 to i64
+; AGGRESSIVE-NEXT:    [[ADDR_PEEL:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM:%.*]], i64 [[IV_EXT_PEEL]]
+; AGGRESSIVE-NEXT:    store i32 0, ptr [[ADDR_PEEL]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING_PEEL]]
+; AGGRESSIVE:       latch_exiting.peel:
+; AGGRESSIVE-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND_PEEL:%.*]] = icmp ult i32 [[IV_NEXT_PEEL]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND_PEEL]], label [[HEADER_PEEL_NEXT:%.*]], label [[END:%.*]]
+; AGGRESSIVE:       header.peel.next:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL_NEXT1:%.*]]
+; AGGRESSIVE:       header.peel.next1:
+; AGGRESSIVE-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; AGGRESSIVE:       entry.peel.newph:
+; AGGRESSIVE-NEXT:    br label [[HEADER:%.*]]
+; AGGRESSIVE:       header:
+; AGGRESSIVE-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LATCH_EXITING:%.*]] ]
+; AGGRESSIVE-NEXT:    br i1 [[COND]], label [[IF:%.*]], label [[LATCH_EXITING]]
+; AGGRESSIVE:       if:
+; AGGRESSIVE-NEXT:    [[IV_EXT:%.*]] = zext nneg i32 [[IV]] to i64
+; AGGRESSIVE-NEXT:    [[ADDR:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT]]
+; AGGRESSIVE-NEXT:    store i32 0, ptr [[ADDR]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING]]
+; AGGRESSIVE:       latch_exiting:
+; AGGRESSIVE-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND]], label [[HEADER]], label [[END_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; AGGRESSIVE:       end.loopexit:
+; AGGRESSIVE-NEXT:    br label [[END]]
+; AGGRESSIVE:       end:
+; AGGRESSIVE-NEXT:    ret void
+;
+; CHECK-LABEL: @unknown_cond_prevents_peel(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH_EXITING:%.*]] ]
+; CHECK-NEXT:    [[NOT_FIRST_IT:%.*]] = icmp ugt i32 [[IV]], 0
+; CHECK-NEXT:    [[IF_COND:%.*]] = and i1 [[COND:%.*]], [[NOT_FIRST_IT]]
+; CHECK-NEXT:    br i1 [[IF_COND]], label [[IF:%.*]], label [[LATCH_EXITING]]
+; CHECK:       if:
+; CHECK-NEXT:    [[IV_EXT:%.*]] = zext nneg i32 [[IV]] to i64
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM:%.*]], i64 [[IV_EXT]]
+; CHECK-NEXT:    store i32 0, ptr [[ADDR]], align 4
+; CHECK-NEXT:    br label [[LATCH_EXITING]]
+; CHECK:       latch_exiting:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 64
+; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[HEADER]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %header
+
+header:                                                 ; preds = %header, %latch_exiting
+  %iv = phi i32 [ 0, %entry ], [ %iv_next, %latch_exiting ]
+  %not_first_it = icmp ugt i32 %iv, 0
+  %if.cond = and i1 %cond, %not_first_it
+  br i1 %if.cond, label %if, label %latch_exiting
+
+if:                                                     ; preds = %header
+  %iv.ext = zext nneg i32 %iv to i64
+  %addr = getelementptr inbounds [16 x [16 x i32]], ptr %mem, i64 %iv.ext
+  store i32 0, ptr %addr, align 4
+  br label %latch_exiting
+
+latch_exiting:                                          ; preds = %header, %if
+  %iv_next = add nuw nsw i32 %iv, 1
+  %loop.cond = icmp ult i32 %iv_next, 64
+  br i1 %loop.cond, label %header, label %end
+
+end:                                                    ; preds = %header
+  ret void
+}
+
+; Even in non-aggressive mode, an unknown select condition should not prevent
+; an otherwise possible peeling thanks to all branch conditions being known
+; after a number of iterations.
+
+define void @unknown_select_doesnt_prevent_peel(ptr noundef %mem, i1 noundef zeroext %cond) {
+; AGGRESSIVE-LABEL: @unknown_select_doesnt_prevent_peel(
+; AGGRESSIVE-NEXT:  entry:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL_BEGIN:%.*]]
+; AGGRESSIVE:       header.peel.begin:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL:%.*]]
+; AGGRESSIVE:       header.peel:
+; AGGRESSIVE-NEXT:    [[NOT_FIRST_IT_PEEL:%.*]] = icmp ugt i32 0, 0
+; AGGRESSIVE-NEXT:    br i1 [[NOT_FIRST_IT_PEEL]], label [[IF_PEEL:%.*]], label [[LATCH_EXITING_PEEL:%.*]]
+; AGGRESSIVE:       if.peel:
+; AGGRESSIVE-NEXT:    [[IV_EXT_PEEL:%.*]] = zext nneg i32 0 to i64
+; AGGRESSIVE-NEXT:    [[ADDR_PEEL:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM:%.*]], i64 [[IV_EXT_PEEL]]
+; AGGRESSIVE-NEXT:    [[DATA_PEEL:%.*]] = select i1 [[COND:%.*]], i32 0, i32 1
+; AGGRESSIVE-NEXT:    store i32 [[DATA_PEEL]], ptr [[ADDR_PEEL]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING_PEEL]]
+; AGGRESSIVE:       latch_exiting.peel:
+; AGGRESSIVE-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND_PEEL:%.*]] = icmp ult i32 [[IV_NEXT_PEEL]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND_PEEL]], label [[HEADER_PEEL_NEXT:%.*]], label [[END:%.*]]
+; AGGRESSIVE:       header.peel.next:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL_NEXT1:%.*]]
+; AGGRESSIVE:       header.peel.next1:
+; AGGRESSIVE-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; AGGRESSIVE:       entry.peel.newph:
+; AGGRESSIVE-NEXT:    br label [[HEADER:%.*]]
+; AGGRESSIVE:       header:
+; AGGRESSIVE-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LATCH_EXITING:%.*]] ]
+; AGGRESSIVE-NEXT:    br i1 true, label [[IF:%.*]], label [[LATCH_EXITING]]
+; AGGRESSIVE:       if:
+; AGGRESSIVE-NEXT:    [[IV_EXT:%.*]] = zext nneg i32 [[IV]] to i64
+; AGGRESSIVE-NEXT:    [[ADDR:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT]]
+; AGGRESSIVE-NEXT:    [[DATA:%.*]] = select i1 [[COND]], i32 0, i32 1
+; AGGRESSIVE-NEXT:    store i32 [[DATA]], ptr [[ADDR]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING]]
+; AGGRESSIVE:       latch_exiting:
+; AGGRESSIVE-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND]], label [[HEADER]], label [[END_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]]
+; AGGRESSIVE:       end.loopexit:
+; AGGRESSIVE-NEXT:    br label [[END]]
+; AGGRESSIVE:       end:
+; AGGRESSIVE-NEXT:    ret void
+;
+; CHECK-LABEL: @unknown_select_doesnt_prevent_peel(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[HEADER_PEEL_BEGIN:%.*]]
+; CHECK:       header.peel.begin:
+; CHECK-NEXT:    br label [[HEADER_PEEL:%.*]]
+; CHECK:       header.peel:
+; CHECK-NEXT:    [[NOT_FIRST_IT_PEEL:%.*]] = icmp ugt i32 0, 0
+; CHECK-NEXT:    br i1 [[NOT_FIRST_IT_PEEL]], label [[IF_PEEL:%.*]], label [[LATCH_EXITING_PEEL:%.*]]
+; CHECK:       if.peel:
+; CHECK-NEXT:    [[IV_EXT_PEEL:%.*]] = zext nneg i32 0 to i64
+; CHECK-NEXT:    [[ADDR_PEEL:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM:%.*]], i64 [[IV_EXT_PEEL]]
+; CHECK-NEXT:    [[DATA_PEEL:%.*]] = select i1 [[COND:%.*]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[DATA_PEEL]], ptr [[ADDR_PEEL]], align 4
+; CHECK-NEXT:    br label [[LATCH_EXITING_PEEL]]
+; CHECK:       latch_exiting.peel:
+; CHECK-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
+; CHECK-NEXT:    [[LOOP_COND_PEEL:%.*]] = icmp ult i32 [[IV_NEXT_PEEL]], 64
+; CHECK-NEXT:    br i1 [[LOOP_COND_PEEL]], label [[HEADER_PEEL_NEXT:%.*]], label [[END:%.*]]
+; CHECK:       header.peel.next:
+; CHECK-NEXT:    br label [[HEADER_PEEL_NEXT1:%.*]]
+; CHECK:       header.peel.next1:
+; CHECK-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; CHECK:       entry.peel.newph:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LATCH_EXITING:%.*]] ]
+; CHECK-NEXT:    br i1 true, label [[IF:%.*]], label [[LATCH_EXITING]]
+; CHECK:       if:
+; CHECK-NEXT:    [[IV_EXT:%.*]] = zext nneg i32 [[IV]] to i64
+; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT]]
+; CHECK-NEXT:    [[DATA:%.*]] = select i1 [[COND]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[DATA]], ptr [[ADDR]], align 4
+; CHECK-NEXT:    br label [[LATCH_EXITING]]
+; CHECK:       latch_exiting:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 64
+; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[HEADER]], label [[END_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       end.loopexit:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %header
+
+header:                                                 ; preds = %header, %latch_exiting
+  %iv = phi i32 [ 0, %entry ], [ %iv_next, %latch_exiting ]
+  %not_first_it = icmp ugt i32 %iv, 0
+  br i1 %not_first_it, label %if, label %latch_exiting
+
+if:                                                     ; preds = %header
+  %iv.ext = zext nneg i32 %iv to i64
+  %addr = getelementptr inbounds [16 x [16 x i32]], ptr %mem, i64 %iv.ext
+  %data = select i1 %cond, i32 0, i32 1
+  store i32 %data, ptr %addr, align 4
+  br label %latch_exiting
+
+latch_exiting:                                          ; preds = %header, %if
+  %iv_next = add nuw nsw i32 %iv, 1
+  %loop.cond = icmp ult i32 %iv_next, 64
+  br i1 %loop.cond, label %header, label %end
+
+end:                                                    ; preds = %header
+  ret void
+}
+
+; In aggressive mode, two iterations of the loop are peeled because if1's
+; condition is IV < 2, even though if2's condition is not fully determined.
+; In non-aggressive mode, the latter prevents any peeling.
+
+define void @mix_known_unknown_prevents_peel(ptr noundef %mem, i1 noundef zeroext %cond) {
+; AGGRESSIVE-LABEL: @mix_known_unknown_prevents_peel(
+; AGGRESSIVE-NEXT:  entry:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL_BEGIN:%.*]]
+; AGGRESSIVE:       header.peel.begin:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL:%.*]]
+; AGGRESSIVE:       header.peel:
+; AGGRESSIVE-NEXT:    [[IV_EXT_PEEL:%.*]] = zext nneg i32 0 to i64
+; AGGRESSIVE-NEXT:    [[IF1_COND_PEEL:%.*]] = icmp ult i32 0, 2
+; AGGRESSIVE-NEXT:    br i1 [[IF1_COND_PEEL]], label [[IF1_PEEL:%.*]], label [[PRED_IF2_PEEL:%.*]]
+; AGGRESSIVE:       if1.peel:
+; AGGRESSIVE-NEXT:    [[ADDR1_PEEL:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM:%.*]], i64 [[IV_EXT_PEEL]]
+; AGGRESSIVE-NEXT:    store i32 0, ptr [[ADDR1_PEEL]], align 4
+; AGGRESSIVE-NEXT:    br label [[PRED_IF2_PEEL]]
+; AGGRESSIVE:       pred_if2.peel:
+; AGGRESSIVE-NEXT:    [[NOT_FIRST_IT_PEEL:%.*]] = icmp ugt i32 0, 0
+; AGGRESSIVE-NEXT:    [[IF2_COND_PEEL:%.*]] = and i1 [[COND:%.*]], [[NOT_FIRST_IT_PEEL]]
+; AGGRESSIVE-NEXT:    br i1 [[IF2_COND_PEEL]], label [[IF2_PEEL:%.*]], label [[LATCH_EXITING_PEEL:%.*]]
+; AGGRESSIVE:       if2.peel:
+; AGGRESSIVE-NEXT:    [[ADDR2_PEEL:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT_PEEL]]
+; AGGRESSIVE-NEXT:    store i32 42, ptr [[ADDR2_PEEL]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING_PEEL]]
+; AGGRESSIVE:       latch_exiting.peel:
+; AGGRESSIVE-NEXT:    [[IV_NEXT_PEEL:%.*]] = add nuw nsw i32 0, 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND_PEEL:%.*]] = icmp ult i32 [[IV_NEXT_PEEL]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND_PEEL]], label [[HEADER_PEEL_NEXT:%.*]], label [[END:%.*]]
+; AGGRESSIVE:       header.peel.next:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL2:%.*]]
+; AGGRESSIVE:       header.peel2:
+; AGGRESSIVE-NEXT:    [[IV_EXT_PEEL3:%.*]] = zext nneg i32 [[IV_NEXT_PEEL]] to i64
+; AGGRESSIVE-NEXT:    [[IF1_COND_PEEL4:%.*]] = icmp ult i32 [[IV_NEXT_PEEL]], 2
+; AGGRESSIVE-NEXT:    br i1 [[IF1_COND_PEEL4]], label [[IF1_PEEL5:%.*]], label [[PRED_IF2_PEEL7:%.*]]
+; AGGRESSIVE:       if1.peel5:
+; AGGRESSIVE-NEXT:    [[ADDR1_PEEL6:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT_PEEL3]]
+; AGGRESSIVE-NEXT:    store i32 0, ptr [[ADDR1_PEEL6]], align 4
+; AGGRESSIVE-NEXT:    br label [[PRED_IF2_PEEL7]]
+; AGGRESSIVE:       pred_if2.peel7:
+; AGGRESSIVE-NEXT:    [[NOT_FIRST_IT_PEEL8:%.*]] = icmp ugt i32 [[IV_NEXT_PEEL]], 0
+; AGGRESSIVE-NEXT:    [[IF2_COND_PEEL9:%.*]] = and i1 [[COND]], [[NOT_FIRST_IT_PEEL8]]
+; AGGRESSIVE-NEXT:    br i1 [[IF2_COND_PEEL9]], label [[IF2_PEEL10:%.*]], label [[LATCH_EXITING_PEEL12:%.*]]
+; AGGRESSIVE:       if2.peel10:
+; AGGRESSIVE-NEXT:    [[ADDR2_PEEL11:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT_PEEL3]]
+; AGGRESSIVE-NEXT:    store i32 42, ptr [[ADDR2_PEEL11]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING_PEEL12]]
+; AGGRESSIVE:       latch_exiting.peel12:
+; AGGRESSIVE-NEXT:    [[IV_NEXT_PEEL13:%.*]] = add nuw nsw i32 [[IV_NEXT_PEEL]], 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND_PEEL14:%.*]] = icmp ult i32 [[IV_NEXT_PEEL13]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND_PEEL14]], label [[HEADER_PEEL_NEXT1:%.*]], label [[END]]
+; AGGRESSIVE:       header.peel.next1:
+; AGGRESSIVE-NEXT:    br label [[HEADER_PEEL_NEXT15:%.*]]
+; AGGRESSIVE:       header.peel.next15:
+; AGGRESSIVE-NEXT:    br label [[ENTRY_PEEL_NEWPH:%.*]]
+; AGGRESSIVE:       entry.peel.newph:
+; AGGRESSIVE-NEXT:    br label [[HEADER:%.*]]
+; AGGRESSIVE:       header:
+; AGGRESSIVE-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL13]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LATCH_EXITING:%.*]] ]
+; AGGRESSIVE-NEXT:    [[IV_EXT:%.*]] = zext nneg i32 [[IV]] to i64
+; AGGRESSIVE-NEXT:    br i1 false, label [[IF1:%.*]], label [[PRED_IF2:%.*]]
+; AGGRESSIVE:       if1:
+; AGGRESSIVE-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT]]
+; AGGRESSIVE-NEXT:    store i32 0, ptr [[ADDR1]], align 4
+; AGGRESSIVE-NEXT:    br label [[PRED_IF2]]
+; AGGRESSIVE:       pred_if2:
+; AGGRESSIVE-NEXT:    br i1 [[COND]], label [[IF2:%.*]], label [[LATCH_EXITING]]
+; AGGRESSIVE:       if2:
+; AGGRESSIVE-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT]]
+; AGGRESSIVE-NEXT:    store i32 42, ptr [[ADDR2]], align 4
+; AGGRESSIVE-NEXT:    br label [[LATCH_EXITING]]
+; AGGRESSIVE:       latch_exiting:
+; AGGRESSIVE-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; AGGRESSIVE-NEXT:    [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 64
+; AGGRESSIVE-NEXT:    br i1 [[LOOP_COND]], label [[HEADER]], label [[END_LOOPEXIT:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; AGGRESSIVE:       end.loopexit:
+; AGGRESSIVE-NEXT:    br label [[END]]
+; AGGRESSIVE:       end:
+; AGGRESSIVE-NEXT:    ret void
+;
+; CHECK-LABEL: @mix_known_unknown_prevents_peel(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[HEADER:%.*]]
+; CHECK:       header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH_EXITING:%.*]] ]
+; CHECK-NEXT:    [[IV_EXT:%.*]] = zext nneg i32 [[IV]] to i64
+; CHECK-NEXT:    [[IF1_COND:%.*]] = icmp ult i32 [[IV]], 2
+; CHECK-NEXT:    br i1 [[IF1_COND]], label [[IF1:%.*]], label [[PRED_IF2:%.*]]
+; CHECK:       if1:
+; CHECK-NEXT:    [[ADDR1:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM:%.*]], i64 [[IV_EXT]]
+; CHECK-NEXT:    store i32 0, ptr [[ADDR1]], align 4
+; CHECK-NEXT:    br label [[PRED_IF2]]
+; CHECK:       pred_if2:
+; CHECK-NEXT:    [[NOT_FIRST_IT:%.*]] = icmp ugt i32 [[IV]], 0
+; CHECK-NEXT:    [[IF2_COND:%.*]] = and i1 [[COND:%.*]], [[NOT_FIRST_IT]]
+; CHECK-NEXT:    br i1 [[IF2_COND]], label [[IF2:%.*]], label [[LATCH_EXITING]]
+; CHECK:       if2:
+; CHECK-NEXT:    [[ADDR2:%.*]] = getelementptr inbounds [16 x [16 x i32]], ptr [[MEM]], i64 [[IV_EXT]]
+; CHECK-NEXT:    store i32 42, ptr [[ADDR2]], align 4
+; CHECK-NEXT:    br label [[LATCH_EXITING]]
+; CHECK:       latch_exiting:
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
+; CHECK-NEXT:    [[LOOP_COND:%.*]] = icmp ult i32 [[IV_NEXT]], 64
+; CHECK-NEXT:    br i1 [[LOOP_COND]], label [[HEADER]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %header
+
+header:                                                 ; preds = %header, %latch_exiting
+  %iv = phi i32 [ 0, %entry ], [ %iv_next, %latch_exiting ]
+  %iv.ext = zext nneg i32 %iv to i64
+  %if1.cond = icmp ult i32 %iv, 2
+  br i1 %if1.cond, label %if1, label %pred_if2
+
+if1:                                                    ; preds = %header
+  %addr1 = getelementptr inbounds [16 x [16 x i32]], ptr %mem, i64 %iv.ext
+  store i32 0, ptr %addr1, align 4
+  br label %pred_if2
+
+pred_if2:                                               ; preds = %if1
+  %not_first_it = icmp ugt i32 %iv, 0
+  %if2.cond = and i1 %cond, %not_first_it
+  br i1 %if2.cond, label %if2, label %latch_exiting
+
+if2:                                                    ; preds = %pred_if2
+  %addr2 = getelementptr inbounds [16 x [16 x i32]], ptr %mem, i64 %iv.ext
+  store i32 42, ptr %addr2, align 4
+  br label %latch_exiting
+
+latch_exiting:                                          ; preds = %pred_if2, %pred_if2
+  %iv_next = add nuw nsw i32 %iv, 1
+  %loop.cond = icmp ult i32 %iv_next, 64
+  br i1 %loop.cond, label %header, label %end
+
+end:                                                    ; preds = %header
+  ret void
+}



More information about the llvm-commits mailing list