[llvm] [CodeGenPrepare] Folding `urem` with loop invariant value as remainder (PR #96625)

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 25 10:29:29 PDT 2024


================
@@ -1974,6 +1975,165 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
   return true;
 }
 
+static bool isRemOfLoopIncrementWithLoopInvariant(
+    Value *Rem, const LoopInfo *LI, Value *&RemAmtOut,
+    std::optional<bool> &AddOrSubOut, Value *&AddOrSubOffsetOut,
+    PHINode *&LoopIncrPNOut) {
+  Value *Incr, *RemAmt;
+  if (!isa<Instruction>(Rem))
+    return false;
+  // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
+  if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
+    return false;
+
+  // Only trivially analyzable loops.
+  Loop *L = LI->getLoopFor(cast<Instruction>(Rem)->getParent());
+  if (L == nullptr || L->getLoopPreheader() == nullptr ||
+      L->getLoopLatch() == nullptr)
+    return false;
+
+  std::optional<bool> AddOrSub;
+  Value *AddOrSubOffset;
+  // Find out loop increment PHI.
+  PHINode *PN = dyn_cast<PHINode>(Incr);
+  if (PN != nullptr) {
+    AddOrSub = std::nullopt;
+    AddOrSubOffset = nullptr;
+  } else {
+    // Search through a NUW add/sub on top of the loop increment.
+    Value *V0, *V1;
+    if (match(Incr, m_NUWAddLike(m_Value(V0), m_Value(V1))))
+      AddOrSub = true;
+    else if (match(Incr, m_NUWSub(m_Value(V0), m_Value(V1))))
+      AddOrSub = false;
+    else
+      return false;
+
+    PN = dyn_cast<PHINode>(V0);
+    if (PN != nullptr) {
+      AddOrSubOffset = V1;
+    } else if (*AddOrSub) {
+      PN = dyn_cast<PHINode>(V1);
+      AddOrSubOffset = V0;
+    }
+  }
+
+  if (PN == nullptr)
+    return false;
+
+  // This isn't strictly necessary, what we really need is one increment and any
+  // amount of initial values all being the same.
+  if (PN->getNumIncomingValues() != 2)
+    return false;
+
+  // Only works if the remainder amount is a loop invaraint
+  if (!L->isLoopInvariant(RemAmt))
+    return false;
+
+  // Is the PHI a loop increment?
+  auto LoopIncrInfo = getIVIncrement(PN, LI);
+  if (!LoopIncrInfo.has_value())
+    return false;
+
+  // We need remainder_amount % increment_amount to be zero. Increment of one
+  // satisfies that without any special logic and is overwhelmingly the common
+  // case.
+  if (!match(LoopIncrInfo->second, m_One()))
+    return false;
+
+  // Need the increment to not overflow.
+  if (!match(LoopIncrInfo->first, m_NUWAdd(m_Value(), m_Value())))
+    return false;
+
+  // Need unique loop preheader and latch.
+  if (PN->getBasicBlockIndex(L->getLoopLatch()) < 0 ||
+      PN->getBasicBlockIndex(L->getLoopPreheader()) < 0)
+    return false;
+
+  // Set output variables.
+  RemAmtOut = RemAmt;
+  LoopIncrPNOut = PN;
+  AddOrSubOut = AddOrSub;
+  AddOrSubOffsetOut = AddOrSubOffset;
+
+  return true;
+}
+
+// Try to transform:
+//
+// for(i = Start; i < End; ++i)
+//    Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
+//
+// ->
+//
+// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
+// for(i = Start; i < End; ++i, ++rem)
+//    Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
+//
+// Currently only implemented for `Start` and `IncrLoopInvariant` being zero.
+static bool foldURemOfLoopIncrement(Instruction *Rem, const LoopInfo *LI,
+                                    SmallSet<BasicBlock *, 32> &FreshBBs,
+                                    bool IsHuge) {
+  std::optional<bool> AddOrSub;
+  Value *AddOrSubOffset, *RemAmt;
+  PHINode *LoopIncrPN;
+  if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddOrSub,
+                                             AddOrSubOffset, LoopIncrPN))
+    return false;
+
+  // Only non-constant remainder as the extra IV is probably not profitable
+  // in that case. Further, since remainder amount is non-constant, only handle
+  // case where `IncrLoopInvariant` and `Start` are 0 to entirely eliminate the
+  // rem (as opposed to just hoisting it outside of the loop).
+  //
+  // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
+  // we can rule out register pressure and ensure this `urem` is executed each
+  // iteration, its probably profitable to handle the const case as well.
+  //
+  // Potential TODO(2): Should we have a check for how "nested" this remainder
+  // operation is? The new code runs every iteration so if the remainder is
+  // guarded behind unlikely conditions this might not be worth it.
+  if (AddOrSub.has_value() || match(RemAmt, m_ImmConstant()))
----------------
nikic wrote:

Please remove all the unnecessary AddSub handling instead. Add it in a followup if you can prove usefulness.

https://github.com/llvm/llvm-project/pull/96625


More information about the llvm-commits mailing list