[llvm] [CodeGenPrepare] Folding `urem` with loop invariant value as remainder (PR #96625)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 25 11:18:59 PDT 2024
================
@@ -1974,6 +1975,165 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
return true;
}
+static bool isRemOfLoopIncrementWithLoopInvariant(
+ Value *Rem, const LoopInfo *LI, Value *&RemAmtOut,
+ std::optional<bool> &AddOrSubOut, Value *&AddOrSubOffsetOut,
+ PHINode *&LoopIncrPNOut) {
+ Value *Incr, *RemAmt;
+ if (!isa<Instruction>(Rem))
+ return false;
+ // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
+ if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
+ return false;
+
+ // Only trivially analyzable loops.
+ Loop *L = LI->getLoopFor(cast<Instruction>(Rem)->getParent());
+ if (L == nullptr || L->getLoopPreheader() == nullptr ||
+ L->getLoopLatch() == nullptr)
+ return false;
+
+ std::optional<bool> AddOrSub;
+ Value *AddOrSubOffset;
+ // Find out loop increment PHI.
+ PHINode *PN = dyn_cast<PHINode>(Incr);
+ if (PN != nullptr) {
+ AddOrSub = std::nullopt;
+ AddOrSubOffset = nullptr;
+ } else {
+ // Search through a NUW add/sub on top of the loop increment.
+ Value *V0, *V1;
+ if (match(Incr, m_NUWAddLike(m_Value(V0), m_Value(V1))))
+ AddOrSub = true;
+ else if (match(Incr, m_NUWSub(m_Value(V0), m_Value(V1))))
+ AddOrSub = false;
+ else
+ return false;
+
+ PN = dyn_cast<PHINode>(V0);
+ if (PN != nullptr) {
+ AddOrSubOffset = V1;
+ } else if (*AddOrSub) {
+ PN = dyn_cast<PHINode>(V1);
+ AddOrSubOffset = V0;
+ }
+ }
+
+ if (PN == nullptr)
+ return false;
+
+ // This isn't strictly necessary, what we really need is one increment and any
+ // amount of initial values all being the same.
+ if (PN->getNumIncomingValues() != 2)
+ return false;
+
+ // Only works if the remainder amount is a loop invaraint
+ if (!L->isLoopInvariant(RemAmt))
+ return false;
+
+ // Is the PHI a loop increment?
+ auto LoopIncrInfo = getIVIncrement(PN, LI);
+ if (!LoopIncrInfo.has_value())
+ return false;
+
+ // We need remainder_amount % increment_amount to be zero. Increment of one
+ // satisfies that without any special logic and is overwhelmingly the common
+ // case.
+ if (!match(LoopIncrInfo->second, m_One()))
+ return false;
+
+ // Need the increment to not overflow.
+ if (!match(LoopIncrInfo->first, m_NUWAdd(m_Value(), m_Value())))
+ return false;
+
+ // Need unique loop preheader and latch.
+ if (PN->getBasicBlockIndex(L->getLoopLatch()) < 0 ||
+ PN->getBasicBlockIndex(L->getLoopPreheader()) < 0)
+ return false;
+
+ // Set output variables.
+ RemAmtOut = RemAmt;
+ LoopIncrPNOut = PN;
+ AddOrSubOut = AddOrSub;
+ AddOrSubOffsetOut = AddOrSubOffset;
+
+ return true;
+}
+
+// Try to transform:
+//
+// for(i = Start; i < End; ++i)
+// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
+//
+// ->
+//
+// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
+// for(i = Start; i < End; ++i, ++rem)
+// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
+//
+// Currently only implemented for `Start` and `IncrLoopInvariant` being zero.
+static bool foldURemOfLoopIncrement(Instruction *Rem, const LoopInfo *LI,
+ SmallSet<BasicBlock *, 32> &FreshBBs,
+ bool IsHuge) {
+ std::optional<bool> AddOrSub;
+ Value *AddOrSubOffset, *RemAmt;
+ PHINode *LoopIncrPN;
+ if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddOrSub,
+ AddOrSubOffset, LoopIncrPN))
+ return false;
+
+ // Only non-constant remainder as the extra IV is probably not profitable
+ // in that case. Further, since remainder amount is non-constant, only handle
+ // case where `IncrLoopInvariant` and `Start` are 0 to entirely eliminate the
+ // rem (as opposed to just hoisting it outside of the loop).
+ //
+ // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
+ // we can rule out register pressure and ensure this `urem` is executed each
+ // iteration, its probably profitable to handle the const case as well.
+ //
+ // Potential TODO(2): Should we have a check for how "nested" this remainder
+ // operation is? The new code runs every iteration so if the remainder is
+ // guarded behind unlikely conditions this might not be worth it.
+ if (AddOrSub.has_value() || match(RemAmt, m_ImmConstant()))
----------------
goldsteinn wrote:
I added test cases for it + enabled it.
A common (motivating) case is:
```
for(i = 0; i < N; ++i) {
if((i + 1) % RemAmt == 0) do_occasional_thing_but_not_on_first_iterations();
// normal loop body
}
```
https://github.com/llvm/llvm-project/pull/96625
More information about the llvm-commits
mailing list