[llvm] [ValueTracking] Make the MaxAnalysisRecursionDepth overridable (PR #137721)

Wed Jun 4 14:56:54 PDT 2025

https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/137721

>From 0590e5e859458225b1471a9f8699068dfa87afbb Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Apr 2025 14:37:25 -0700
Subject: [PATCH 1/2] [ValueTracking] Make the MaxAnalysisRecursionDepth
 overridable

Change-Id: Id715df21631213e3ac77bf6d24a41375dab194b9
---
 llvm/include/llvm/Analysis/ValueTracking.h    |  2 +
 llvm/lib/Analysis/ValueTracking.cpp           | 69 +++++++++-------
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |  2 +-
 .../InstCombine/InstCombineCompares.cpp       |  2 +-
 .../InstCombine/InstCombineMulDivRem.cpp      |  2 +-
 .../InstCombine/InstCombineSelect.cpp         |  6 +-
 .../InstCombineSimplifyDemanded.cpp           | 19 +++--
 .../InstCombine/InstructionCombining.cpp      |  9 ++-
 .../Scalar/ConstraintElimination.cpp          |  9 ++-
 .../InstCombine/simplifydemanded-depth.ll     | 80 +++++++++++++++++++
 10 files changed, 147 insertions(+), 53 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index b05b8f349b8d5..412ba32072e13 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -45,6 +45,8 @@ template <typename T> class ArrayRef;
 
 constexpr unsigned MaxAnalysisRecursionDepth = 6;
 
+unsigned getAnalysisRecursionDepthLimit();
+
 /// Determine which bits of V are known to be either zero or one and return
 /// them in the KnownZero/KnownOne bit sets.
 ///
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 0a460786d00ea..af4b69d0b5619 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -90,6 +90,8 @@ using namespace llvm::PatternMatch;
 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
                                               cl::Hidden, cl::init(20));
 
+static cl::opt<bool> ExhaustiveRecursion("exhaustive-analysis-recursion",
+                                         cl::Hidden);
 
 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
 /// returns the element type's bitwidth.
@@ -131,6 +133,12 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
                                       DemandedElts, DemandedLHS, DemandedRHS);
 }
 
+unsigned llvm::getAnalysisRecursionDepthLimit() {
+  if (!ExhaustiveRecursion.getNumOccurrences() || !ExhaustiveRecursion)
+    return MaxAnalysisRecursionDepth;
+  return -1;
+}
+
 static void computeKnownBits(const Value *V, const APInt &DemandedElts,
                              KnownBits &Known, const SimplifyQuery &Q,
                              unsigned Depth);
@@ -798,7 +806,7 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
                                      KnownBits &Known, const SimplifyQuery &SQ,
                                      bool Invert, unsigned Depth) {
   Value *A, *B;
-  if (Depth < MaxAnalysisRecursionDepth &&
+  if (Depth < getAnalysisRecursionDepthLimit() &&
       match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
     KnownBits Known2(Known.getBitWidth());
     KnownBits Known3(Known.getBitWidth());
@@ -833,7 +841,8 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
     return;
   }
 
-  if (Depth < MaxAnalysisRecursionDepth && match(Cond, m_Not(m_Value(A))))
+  if (Depth < getAnalysisRecursionDepthLimit() &&
+      match(Cond, m_Not(m_Value(A))))
     computeKnownBitsFromCond(V, A, Known, SQ, !Invert, Depth + 1);
 }
 
@@ -927,7 +936,7 @@ void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
     }
 
     // The remaining tests are all recursive, so bail out if we hit the limit.
-    if (Depth == MaxAnalysisRecursionDepth)
+    if (Depth == getAnalysisRecursionDepthLimit())
       continue;
 
     ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -1696,7 +1705,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
 
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
-    if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
+    if (Depth < getAnalysisRecursionDepthLimit() - 1 && Known.isUnknown()) {
       // Skip if every incoming value references to ourself.
       if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
         break;
@@ -1725,7 +1734,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // TODO: See if we can base recursion limiter on number of incoming phi
         // edges so we don't overly clamp analysis.
         computeKnownBits(IncValue, DemandedElts, Known2, RecQ,
-                         MaxAnalysisRecursionDepth - 1);
+                         getAnalysisRecursionDepthLimit() - 1);
 
         // See if we can further use a conditional branch into the phi
         // to help us determine the range of the value.
@@ -2194,7 +2203,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
   }
 
   assert(V && "No Value?");
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
 
 #ifndef NDEBUG
   Type *Ty = V->getType();
@@ -2293,7 +2302,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
       Known = Range->toKnownBits();
 
   // All recursive calls that increase depth must come after this.
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return;
 
   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
@@ -2406,7 +2415,7 @@ static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
 /// types and vectors of integers.
 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
                                   const SimplifyQuery &Q, unsigned Depth) {
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
 
   if (isa<Constant>(V))
     return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
@@ -2468,7 +2477,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
     return true;
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
-  if (Depth++ == MaxAnalysisRecursionDepth)
+  if (Depth++ == getAnalysisRecursionDepthLimit())
     return false;
 
   switch (I->getOpcode()) {
@@ -2556,7 +2565,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
 
     // Recursively check all incoming values. Limit recursion to 2 levels, so
     // that search complexity is limited to number of operands^2.
-    unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+    unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
     return llvm::all_of(PN->operands(), [&](const Use &U) {
       // Value is power of 2 if it is coming from PHI node itself by induction.
       if (U.get() == PN)
@@ -2660,7 +2669,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, const SimplifyQuery &Q,
     // to recurse 10k times just because we have 10k GEP operands. We don't
     // bail completely out because we want to handle constant GEPs regardless
     // of depth.
-    if (Depth++ >= MaxAnalysisRecursionDepth)
+    if (Depth++ >= getAnalysisRecursionDepthLimit())
       continue;
 
     if (isKnownNonZero(GTI.getOperand(), Q, Depth))
@@ -3164,7 +3173,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
 
     // Check if all incoming values are non-zero using recursion.
     SimplifyQuery RecQ = Q.getWithoutCondContext();
-    unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+    unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
     return llvm::all_of(PN->operands(), [&](const Use &U) {
       if (U.get() == PN)
         return true;
@@ -3430,7 +3439,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
   Type *Ty = V->getType();
 
 #ifndef NDEBUG
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
     assert(
@@ -3493,7 +3502,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
     return true;
 
   // Some of the tests below are recursive, so bail out if we hit the limit.
-  if (Depth++ >= MaxAnalysisRecursionDepth)
+  if (Depth++ >= getAnalysisRecursionDepthLimit())
     return false;
 
   // Check for pointer simplifications.
@@ -3877,7 +3886,7 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2,
     // We can't look through casts yet.
     return false;
 
-  if (Depth >= MaxAnalysisRecursionDepth)
+  if (Depth >= getAnalysisRecursionDepthLimit())
     return false;
 
   // See if we can recurse through (exactly one of) our operands.  This
@@ -3994,7 +4003,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
                                        const SimplifyQuery &Q, unsigned Depth) {
   Type *Ty = V->getType();
 #ifndef NDEBUG
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
     assert(
@@ -4021,7 +4030,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
   // Note that ConstantInt is handled by the general computeKnownBits case
   // below.
 
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return 1;
 
   if (auto *U = dyn_cast<Operator>(V)) {
@@ -4555,7 +4564,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
                                         KnownFPClass &KnownFromContext,
                                         unsigned Depth = 0) {
   Value *A, *B;
-  if (Depth < MaxAnalysisRecursionDepth &&
+  if (Depth < getAnalysisRecursionDepthLimit() &&
       (CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
                   : match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
     computeKnownFPClassFromCond(V, A, CondIsTrue, CxtI, KnownFromContext,
@@ -4564,7 +4573,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
                                 Depth + 1);
     return;
   }
-  if (Depth < MaxAnalysisRecursionDepth && match(Cond, m_Not(m_Value(A)))) {
+  if (Depth < getAnalysisRecursionDepthLimit() && match(Cond, m_Not(m_Value(A)))) {
     computeKnownFPClassFromCond(V, A, !CondIsTrue, CxtI, KnownFromContext,
                                 Depth + 1);
     return;
@@ -4696,7 +4705,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     return;
   }
 
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
 
   if (auto *CFP = dyn_cast<ConstantFP>(V)) {
     Known.KnownFPClasses = CFP->getValueAPF().classify();
@@ -4790,7 +4799,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     return;
 
   // All recursive calls that increase depth must come after this.
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return;
 
   const unsigned Opc = Op->getOpcode();
@@ -5744,7 +5753,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
 
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
-    const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
+    const unsigned PhiRecursionLimit = getAnalysisRecursionDepthLimit() - 2;
 
     if (Depth < PhiRecursionLimit) {
       // Skip if every incoming value references to ourself.
@@ -7559,7 +7568,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
 static bool isGuaranteedNotToBeUndefOrPoison(
     const Value *V, AssumptionCache *AC, const Instruction *CtxI,
     const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
-  if (Depth >= MaxAnalysisRecursionDepth)
+  if (Depth >= getAnalysisRecursionDepthLimit())
     return false;
 
   if (isa<MetadataAsValue>(V))
@@ -8895,7 +8904,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                                              Instruction::CastOps *CastOp,
                                              unsigned Depth) {
-  if (Depth >= MaxAnalysisRecursionDepth)
+  if (Depth >= getAnalysisRecursionDepthLimit())
     return {SPF_UNKNOWN, SPNB_NA, false};
 
   SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -9314,10 +9323,10 @@ isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
     // C1` (see discussion: D58633).
     ConstantRange LCR = computeConstantRange(
         L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
-        /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
+        /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
     ConstantRange RCR = computeConstantRange(
         R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
-        /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
+        /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
     // Even if L1/R1 are not both constant, we can still sometimes deduce
     // relationship from a single constant. For example X u> Y implies X != 0.
     if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
@@ -9382,7 +9391,7 @@ isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
           LHS->getOpcode() == Instruction::Select) &&
          "Expected LHS to be 'and', 'or', or 'select'.");
 
-  assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Hit recursion limit");
 
   // If the result of an 'or' is false, then we know both legs of the 'or' are
   // false.  Similarly, if the result of an 'and' is true, then we know both
@@ -9407,7 +9416,7 @@ llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
                          const Value *RHSOp0, const Value *RHSOp1,
                          const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
   // Bail out when we hit the limit.
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return std::nullopt;
 
   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
@@ -9478,7 +9487,7 @@ std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
     return std::nullopt;
   }
 
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return std::nullopt;
 
   // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
@@ -9940,7 +9949,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
                                          unsigned Depth) {
   assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
 
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
 
   if (auto *C = dyn_cast<Constant>(V))
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 64af7a57e8d12..b9a1b72001ded 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1918,7 +1918,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
                                              const MachineRegisterInfo &MRI,
                                              unsigned Depth,
                                              UndefPoisonKind Kind) {
-  if (Depth >= MaxAnalysisRecursionDepth)
+  if (Depth >= getAnalysisRecursionDepthLimit())
     return false;
 
   MachineInstr *RegDef = MRI.getVRegDef(Reg);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c112fae351817..ffda554c84e83 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4390,7 +4390,7 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
     return true;
   if (V->getType()->getScalarSizeInBits() == 1)
     return true;
-  if (Depth++ >= MaxAnalysisRecursionDepth)
+  if (Depth++ >= getAnalysisRecursionDepthLimit())
     return false;
   Value *X;
   const Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 457199a72510e..6fe80e01eec7d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1533,7 +1533,7 @@ Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero,
     });
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
-  if (Depth++ == MaxAnalysisRecursionDepth)
+  if (Depth++ == getAnalysisRecursionDepthLimit())
     return nullptr;
 
   // log2(zext X) -> zext log2(X)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d7d0431a5b8d0..0bf05f975ffaa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3641,7 +3641,7 @@ static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
 /// select condition.
 static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
                              unsigned Depth) {
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return false;
 
   // Ignore the case where the select arm itself is affected. These cases
@@ -3651,9 +3651,9 @@ static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
 
   if (auto *I = dyn_cast<Instruction>(V)) {
     if (isa<PHINode>(I)) {
-      if (Depth == MaxAnalysisRecursionDepth - 1)
+      if (Depth == getAnalysisRecursionDepthLimit() - 1)
         return false;
-      Depth = MaxAnalysisRecursionDepth - 2;
+      Depth = getAnalysisRecursionDepthLimit() - 2;
     }
     return any_of(I->operands(), [&](Value *Op) {
       return Op->getType()->isIntOrIntVectorTy() &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0e3436d12702d..595fa2c5c7e26 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -117,7 +117,7 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
     return false;
   }
 
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return false;
 
   Value *NewVal;
@@ -167,7 +167,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
                                                  const SimplifyQuery &Q,
                                                  unsigned Depth) {
   assert(I != nullptr && "Null pointer of Value???");
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
   uint32_t BitWidth = DemandedMask.getBitWidth();
   Type *VTy = I->getType();
   assert(
@@ -1451,7 +1451,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
   }
 
   // Limit search depth.
-  if (Depth == SimplifyDemandedVectorEltsDepthLimit)
+  if (Depth == SimplifyDemandedVectorEltsDepthLimit &&
+      Depth >= getAnalysisRecursionDepthLimit())
     return nullptr;
 
   if (!AllowMultipleUsers) {
@@ -1965,12 +1966,10 @@ static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {
   }
 }
 
-Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
-                                                    FPClassTest DemandedMask,
-                                                    KnownFPClass &Known,
-                                                    Instruction *CxtI,
-                                                    unsigned Depth) {
-  assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
+    Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,
+    Instruction *CxtI, unsigned Depth) {
+  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
   Type *VTy = V->getType();
 
   assert(Known == KnownFPClass() && "expected uninitialized state");
@@ -1978,7 +1977,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
   if (DemandedMask == fcNone)
     return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
 
-  if (Depth == MaxAnalysisRecursionDepth)
+  if (Depth == getAnalysisRecursionDepthLimit())
     return nullptr;
 
   Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 439a86d951a83..1a245631438dc 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2594,7 +2594,7 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
   if (match(V, m_ImmConstant(C)))
     return ConstantExpr::getNot(C);
 
-  if (Depth++ >= MaxAnalysisRecursionDepth)
+  if (Depth++ >= getAnalysisRecursionDepthLimit())
     return nullptr;
 
   // The rest of the cases require that we invert all uses so don't bother
@@ -2686,9 +2686,10 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
     SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
     for (Use &U : PN->operands()) {
       BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
-      Value *NewIncomingVal = getFreelyInvertedImpl(
-          U.get(), /*WillInvertAllUses=*/false,
-          /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
+      Value *NewIncomingVal =
+          getFreelyInvertedImpl(U.get(), /*WillInvertAllUses=*/false,
+                                /*Builder=*/nullptr, LocalDoesConsume,
+                                getAnalysisRecursionDepthLimit() - 1);
       if (NewIncomingVal == nullptr)
         return nullptr;
       // Make sure that we can safely erase the original PHI node.
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index cbad5dd357687..b57b82a97d743 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -822,8 +822,10 @@ ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
   // unsigned ones. This increases the reasoning effectiveness in combination
   // with the signed <-> unsigned transfer logic.
   if (CmpInst::isSigned(Pred) &&
-      isKnownNonNegative(Op0, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1) &&
-      isKnownNonNegative(Op1, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1))
+      isKnownNonNegative(Op0, DL,
+                         /*Depth=*/getAnalysisRecursionDepthLimit() - 1) &&
+      isKnownNonNegative(Op1, DL,
+                         /*Depth=*/getAnalysisRecursionDepthLimit() - 1))
     Pred = ICmpInst::getUnsignedPredicate(Pred);
 
   SmallVector<Value *> NewVariables;
@@ -896,7 +898,8 @@ void ConstraintInfo::transferToOtherSystem(
     unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack) {
   auto IsKnownNonNegative = [this](Value *V) {
     return doesHold(CmpInst::ICMP_SGE, V, ConstantInt::get(V->getType(), 0)) ||
-           isKnownNonNegative(V, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1);
+           isKnownNonNegative(V, DL,
+                              /*Depth=*/getAnalysisRecursionDepthLimit() - 1);
   };
   // Check if we can combine facts from the signed and unsigned systems to
   // derive additional facts.
diff --git a/llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll b/llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll
new file mode 100644
index 0000000000000..228877879a31b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -passes=instcombine | FileCheck -check-prefixes=DEFAULT %s
+; RUN: opt -S < %s -passes=instcombine -exhaustive-analysis-recursion=true | FileCheck -check-prefixes=EXHUASTIVE %s
+
+declare i32 @callee()
+
+define i32 @test5(i1 %C) {
+; DEFAULT-LABEL: @test5(
+; DEFAULT-NEXT:    [[VALUE:%.*]] = tail call i32 @callee(), !range [[RNG0:![0-9]+]]
+; DEFAULT-NEXT:    [[VALUE_MASK0:%.*]] = and i32 [[VALUE]], 8
+; DEFAULT-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[VALUE_MASK0]], 0
+; DEFAULT-NEXT:    [[VALUE_MASK1:%.*]] = and i32 [[VALUE]], 16
+; DEFAULT-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[VALUE_MASK1]], 0
+; DEFAULT-NEXT:    [[VALUE_MASK2:%.*]] = and i32 [[VALUE]], 32
+; DEFAULT-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[VALUE_MASK2]], 0
+; DEFAULT-NEXT:    [[VALUE_SHIFT0:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; DEFAULT-NEXT:    [[VALUE_MASK4:%.*]] = and i32 [[VALUE_SHIFT0]], 56
+; DEFAULT-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; DEFAULT-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; DEFAULT-NEXT:    [[COMBINED_01:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; DEFAULT-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; DEFAULT-NEXT:    [[COMBINED_012:%.*]] = or disjoint i32 [[COMBINED_01]], [[SEL2]]
+; DEFAULT-NEXT:    [[COMBINED_0123:%.*]] = xor i32 [[COMBINED_012]], [[VALUE_MASK4]]
+; DEFAULT-NEXT:    [[VALUE_SHIFT1:%.*]] = and i32 [[VALUE_SHIFT0]], 1536
+; DEFAULT-NEXT:    [[OP0:%.*]] = or disjoint i32 [[VALUE_SHIFT1]], [[COMBINED_0123]]
+; DEFAULT-NEXT:    [[VALUE_MASK3:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; DEFAULT-NEXT:    [[OP1:%.*]] = and i32 [[VALUE_MASK3]], 2048
+; DEFAULT-NEXT:    [[BASE:%.*]] = or disjoint i32 [[OP0]], [[OP1]]
+; DEFAULT-NEXT:    [[XOR:%.*]] = xor i32 [[BASE]], 4096
+; DEFAULT-NEXT:    ret i32 [[XOR]]
+;
+; EXHUASTIVE-LABEL: @test5(
+; EXHUASTIVE-NEXT:    [[VALUE:%.*]] = tail call i32 @callee(), !range [[RNG0:![0-9]+]]
+; EXHUASTIVE-NEXT:    [[VALUE_MASK0:%.*]] = and i32 [[VALUE]], 8
+; EXHUASTIVE-NEXT:    [[CMP0:%.*]] = icmp eq i32 [[VALUE_MASK0]], 0
+; EXHUASTIVE-NEXT:    [[VALUE_MASK1:%.*]] = and i32 [[VALUE]], 16
+; EXHUASTIVE-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[VALUE_MASK1]], 0
+; EXHUASTIVE-NEXT:    [[VALUE_MASK2:%.*]] = and i32 [[VALUE]], 32
+; EXHUASTIVE-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[VALUE_MASK2]], 0
+; EXHUASTIVE-NEXT:    [[VALUE_SHIFT0:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; EXHUASTIVE-NEXT:    [[VALUE_MASK4:%.*]] = and i32 [[VALUE_SHIFT0]], 56
+; EXHUASTIVE-NEXT:    [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; EXHUASTIVE-NEXT:    [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; EXHUASTIVE-NEXT:    [[COMBINED_01:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; EXHUASTIVE-NEXT:    [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; EXHUASTIVE-NEXT:    [[COMBINED_012:%.*]] = or disjoint i32 [[COMBINED_01]], [[SEL2]]
+; EXHUASTIVE-NEXT:    [[COMBINED_0123:%.*]] = xor i32 [[COMBINED_012]], [[VALUE_MASK4]]
+; EXHUASTIVE-NEXT:    [[VALUE_SHIFT1:%.*]] = and i32 [[VALUE_SHIFT0]], 1536
+; EXHUASTIVE-NEXT:    [[OP0:%.*]] = or disjoint i32 [[VALUE_SHIFT1]], [[COMBINED_0123]]
+; EXHUASTIVE-NEXT:    [[VALUE_MASK3:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; EXHUASTIVE-NEXT:    [[OP1:%.*]] = and i32 [[VALUE_MASK3]], 2048
+; EXHUASTIVE-NEXT:    [[BASE:%.*]] = or disjoint i32 [[OP0]], [[OP1]]
+; EXHUASTIVE-NEXT:    [[XOR:%.*]] = or disjoint i32 [[BASE]], 4096
+; EXHUASTIVE-NEXT:    ret i32 [[XOR]]
+;
+  %value = tail call i32 @callee(), !range !0
+  %value_mask0 = and i32 %value, 8
+  %cmp0 = icmp eq i32 %value_mask0, 0
+  %value_mask1 = and i32 %value, 16
+  %cmp1 = icmp eq i32 %value_mask1, 0
+  %value_mask2 = and i32 %value, 32
+  %cmp2 = icmp eq i32 %value_mask2, 0
+  %value_mask3 = and i32 %value, 256
+  %value_shift0 = shl i32 %value, 3
+  %value_mask4 = and i32 %value_shift0, 56
+  %sel0 = select i1 %cmp0, i32 0, i32 72 ; lane_bit3 * 73
+  %sel1 = select i1 %cmp1, i32 0, i32 144  ; lane_bit4 * 144
+  %combined_01 = or disjoint i32 %sel0, %sel1     ;
+  %sel2 = select i1 %cmp2, i32 0, i32 288  ; lane_bit5 * 288
+  %combined_012 = or disjoint i32 %combined_01, %sel2
+  %combined_0123 = xor i32 %combined_012, %value_mask4
+  %value_shift1 = and i32 %value_shift0, 1536
+  %op0 = or disjoint i32 %value_shift1, %combined_0123
+  %op1 = shl nuw nsw i32 %value_mask3, 3
+  %base = or disjoint i32 %op0, %op1
+  %xor = xor i32 %base, 4096
+  ret i32 %xor
+}
+
+!0 = !{ i32 0, i32 2048 }

>From 489fc1ccde12963855b20e1c0f5fefbaf232fd98 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 28 May 2025 17:07:04 -0700
Subject: [PATCH 2/2] computeKnownBitsExhaustive + usecase + infinite recursion
 tracking

Change-Id: Icf0dac8c87812aa8edff53a1d5cdd664ab6d6d12
---
 llvm/include/llvm/Analysis/ValueTracking.h    | 66 ++++++++++++-
 llvm/lib/Analysis/ValueTracking.cpp           | 93 +++++++++++--------
 llvm/lib/CodeGen/GlobalISel/Utils.cpp         |  2 +-
 .../InstCombine/InstCombineAndOrXor.cpp       |  8 +-
 .../InstCombine/InstCombineCompares.cpp       |  2 +-
 .../InstCombine/InstCombineMulDivRem.cpp      |  2 +-
 .../InstCombine/InstCombineSelect.cpp         |  6 +-
 .../InstCombineSimplifyDemanded.cpp           | 18 ++--
 .../InstCombine/InstructionCombining.cpp      |  4 +-
 .../Scalar/ConstraintElimination.cpp          |  6 +-
 .../Scalar/SeparateConstOffsetFromGEP.cpp     | 23 ++++-
 .../AMDGPU/xor-or-disjoint.ll                 | 41 ++++++++
 12 files changed, 206 insertions(+), 65 deletions(-)
 create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 412ba32072e13..e2c4c5112224a 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -45,7 +45,64 @@ template <typename T> class ArrayRef;
 
 constexpr unsigned MaxAnalysisRecursionDepth = 6;
 
-unsigned getAnalysisRecursionDepthLimit();
+class DepthLimit {
+public:
+  static DepthLimit &get() {
+    static DepthLimit Instance;
+    return Instance;
+  }
+
+  enum class VTCycle {
+    KNOWNBIT = 0,
+    KNOWNBITCOND = 1,
+    NONZERO = 2,
+    NONEQUAL = 3,
+    IMPLIED = 4,
+    FPCLASS = 5,
+    RANGE = 6,
+    SIGNBITS = 7,
+    NOTUNDEFPOISON = 8,
+    NONE = 9
+  };
+
+  static unsigned getMaxRecursionDepth(VTCycle Cycle, const Value *I,
+                                       unsigned Depth) {
+    if (!get().RecursionDepthOverride || Cycle == VTCycle::NONE)
+      return get().getMaxRecursionDepthImpl();
+
+    if (get().Encountered[Cycle].insert(I).second)
+      return get().getMaxRecursionDepthImpl();
+
+    return Depth;
+  }
+  static unsigned getMaxRecursionDepth() {
+    return get().getMaxRecursionDepthImpl();
+  }
+  static void setOverrideDepthLimit() { get().setOverrideDepthLimitImpl(); }
+  static void resetOverrideDepthLimit() { get().resetOverrideDepthLimitImpl(); }
+
+  DepthLimit(const DepthLimit &) = delete;
+  DepthLimit &operator=(const DepthLimit &) = delete;
+
+private:
+  DepthLimit() {}
+
+  const unsigned MaxAnalysisRecurionsDpeth = 6;
+  bool RecursionDepthOverride = false;
+
+  DenseMap<VTCycle, SmallPtrSet<const Value *, 8>> Encountered;
+
+  unsigned getMaxRecursionDepthImpl() {
+    return RecursionDepthOverride ? -1 : MaxAnalysisRecurionsDpeth;
+  }
+
+  void setOverrideDepthLimitImpl() { RecursionDepthOverride = true; }
+
+  void resetOverrideDepthLimitImpl() {
+    RecursionDepthOverride = false;
+    Encountered.clear();
+  }
+};
 
 /// Determine which bits of V are known to be either zero or one and return
 /// them in the KnownZero/KnownOne bit sets.
@@ -88,6 +145,13 @@ LLVM_ABI KnownBits computeKnownBits(const Value *V, const SimplifyQuery &Q,
 LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known,
                                const SimplifyQuery &Q, unsigned Depth = 0);
 
+void computeKnownBitsExhaustive(const Value *V, KnownBits &Known,
+                                const DataLayout &DL,
+                                AssumptionCache *AC = nullptr,
+                                const Instruction *CxtI = nullptr,
+                                const DominatorTree *DT = nullptr,
+                                bool UseInstrInfo = true);
+
 /// Compute known bits from the range metadata.
 /// \p KnownZero the set of bits that are known to be zero
 /// \p KnownOne the set of bits that are known to be one
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index af4b69d0b5619..bf8ca3766eecb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -90,9 +90,6 @@ using namespace llvm::PatternMatch;
 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
                                               cl::Hidden, cl::init(20));
 
-static cl::opt<bool> ExhaustiveRecursion("exhaustive-analysis-recursion",
-                                         cl::Hidden);
-
 /// Returns the bitwidth of the given scalar or pointer type. For vector types,
 /// returns the element type's bitwidth.
 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -133,12 +130,6 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
                                       DemandedElts, DemandedLHS, DemandedRHS);
 }
 
-unsigned llvm::getAnalysisRecursionDepthLimit() {
-  if (!ExhaustiveRecursion.getNumOccurrences() || !ExhaustiveRecursion)
-    return MaxAnalysisRecursionDepth;
-  return -1;
-}
-
 static void computeKnownBits(const Value *V, const APInt &DemandedElts,
                              KnownBits &Known, const SimplifyQuery &Q,
                              unsigned Depth);
@@ -163,6 +154,16 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known,
                    Depth);
 }
 
+void llvm::computeKnownBitsExhaustive(const Value *V, KnownBits &Known,
+                                      const DataLayout &DL, AssumptionCache *AC,
+                                      const Instruction *CxtI,
+                                      const DominatorTree *DT,
+                                      bool UseInstrInfo) {
+  DepthLimit::setOverrideDepthLimit();
+  computeKnownBits(V, Known, DL, AC, CxtI, DT, UseInstrInfo, /*Depth=*/0);
+  DepthLimit::resetOverrideDepthLimit();
+}
+
 KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
                                  AssumptionCache *AC, const Instruction *CxtI,
                                  const DominatorTree *DT, bool UseInstrInfo,
@@ -806,7 +807,8 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
                                      KnownBits &Known, const SimplifyQuery &SQ,
                                      bool Invert, unsigned Depth) {
   Value *A, *B;
-  if (Depth < getAnalysisRecursionDepthLimit() &&
+  if (Depth < DepthLimit::getMaxRecursionDepth(
+                  DepthLimit::VTCycle::KNOWNBITCOND, V, Depth) &&
       match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
     KnownBits Known2(Known.getBitWidth());
     KnownBits Known3(Known.getBitWidth());
@@ -841,7 +843,7 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
     return;
   }
 
-  if (Depth < getAnalysisRecursionDepthLimit() &&
+  if (Depth < DepthLimit::getMaxRecursionDepth() &&
       match(Cond, m_Not(m_Value(A))))
     computeKnownBitsFromCond(V, A, Known, SQ, !Invert, Depth + 1);
 }
@@ -936,7 +938,7 @@ void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
     }
 
     // The remaining tests are all recursive, so bail out if we hit the limit.
-    if (Depth == getAnalysisRecursionDepthLimit())
+    if (Depth == DepthLimit::getMaxRecursionDepth())
       continue;
 
     ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -1705,7 +1707,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
 
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
-    if (Depth < getAnalysisRecursionDepthLimit() - 1 && Known.isUnknown()) {
+    if (Depth < DepthLimit::getMaxRecursionDepth() - 1 && Known.isUnknown()) {
       // Skip if every incoming value references to ourself.
       if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
         break;
@@ -1734,7 +1736,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
         // TODO: See if we can base recursion limiter on number of incoming phi
         // edges so we don't overly clamp analysis.
         computeKnownBits(IncValue, DemandedElts, Known2, RecQ,
-                         getAnalysisRecursionDepthLimit() - 1);
+                         DepthLimit::getMaxRecursionDepth() - 1);
 
         // See if we can further use a conditional branch into the phi
         // to help us determine the range of the value.
@@ -2203,7 +2205,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
   }
 
   assert(V && "No Value?");
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
 
 #ifndef NDEBUG
   Type *Ty = V->getType();
@@ -2302,7 +2304,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
       Known = Range->toKnownBits();
 
   // All recursive calls that increase depth must come after this.
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth ==
+      DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::KNOWNBIT, V, Depth))
     return;
 
   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
@@ -2415,7 +2418,7 @@ static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
 /// types and vectors of integers.
 bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
                                   const SimplifyQuery &Q, unsigned Depth) {
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
 
   if (isa<Constant>(V))
     return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
@@ -2477,7 +2480,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
     return true;
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
-  if (Depth++ == getAnalysisRecursionDepthLimit())
+  if (Depth++ == DepthLimit::getMaxRecursionDepth())
     return false;
 
   switch (I->getOpcode()) {
@@ -2565,7 +2568,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
 
     // Recursively check all incoming values. Limit recursion to 2 levels, so
     // that search complexity is limited to number of operands^2.
-    unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
+    unsigned NewDepth = std::max(Depth, DepthLimit::getMaxRecursionDepth() - 1);
     return llvm::all_of(PN->operands(), [&](const Use &U) {
       // Value is power of 2 if it is coming from PHI node itself by induction.
       if (U.get() == PN)
@@ -2669,7 +2672,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, const SimplifyQuery &Q,
     // to recurse 10k times just because we have 10k GEP operands. We don't
     // bail completely out because we want to handle constant GEPs regardless
     // of depth.
-    if (Depth++ >= getAnalysisRecursionDepthLimit())
+    if (Depth++ >= DepthLimit::getMaxRecursionDepth())
       continue;
 
     if (isKnownNonZero(GTI.getOperand(), Q, Depth))
@@ -3173,7 +3176,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
 
     // Check if all incoming values are non-zero using recursion.
     SimplifyQuery RecQ = Q.getWithoutCondContext();
-    unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
+    unsigned NewDepth = std::max(Depth, DepthLimit::getMaxRecursionDepth() - 1);
     return llvm::all_of(PN->operands(), [&](const Use &U) {
       if (U.get() == PN)
         return true;
@@ -3439,7 +3442,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
   Type *Ty = V->getType();
 
 #ifndef NDEBUG
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
     assert(
@@ -3502,9 +3505,11 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
     return true;
 
   // Some of the tests below are recursive, so bail out if we hit the limit.
-  if (Depth++ >= getAnalysisRecursionDepthLimit())
+  if (Depth >=
+      DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::NONZERO, V, Depth))
     return false;
 
+  ++Depth;
   // Check for pointer simplifications.
 
   if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
@@ -3886,7 +3891,8 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2,
     // We can't look through casts yet.
     return false;
 
-  if (Depth >= getAnalysisRecursionDepthLimit())
+  if (Depth >= DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::NONEQUAL,
+                                                V1, Depth))
     return false;
 
   // See if we can recurse through (exactly one of) our operands.  This
@@ -4003,7 +4009,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
                                        const SimplifyQuery &Q, unsigned Depth) {
   Type *Ty = V->getType();
 #ifndef NDEBUG
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
 
   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
     assert(
@@ -4030,7 +4036,8 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
   // Note that ConstantInt is handled by the general computeKnownBits case
   // below.
 
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth ==
+      DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::SIGNBITS, V, Depth))
     return 1;
 
   if (auto *U = dyn_cast<Operator>(V)) {
@@ -4564,7 +4571,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
                                         KnownFPClass &KnownFromContext,
                                         unsigned Depth = 0) {
   Value *A, *B;
-  if (Depth < getAnalysisRecursionDepthLimit() &&
+  if (Depth < DepthLimit::getMaxRecursionDepth() &&
       (CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
                   : match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
     computeKnownFPClassFromCond(V, A, CondIsTrue, CxtI, KnownFromContext,
@@ -4573,7 +4580,8 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
                                 Depth + 1);
     return;
   }
-  if (Depth < getAnalysisRecursionDepthLimit() && match(Cond, m_Not(m_Value(A)))) {
+  if (Depth < DepthLimit::getMaxRecursionDepth() &&
+      match(Cond, m_Not(m_Value(A)))) {
     computeKnownFPClassFromCond(V, A, !CondIsTrue, CxtI, KnownFromContext,
                                 Depth + 1);
     return;
@@ -4705,7 +4713,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     return;
   }
 
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
 
   if (auto *CFP = dyn_cast<ConstantFP>(V)) {
     Known.KnownFPClasses = CFP->getValueAPF().classify();
@@ -4799,7 +4807,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
     return;
 
   // All recursive calls that increase depth must come after this.
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth ==
+      DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::FPCLASS, Op, Depth))
     return;
 
   const unsigned Opc = Op->getOpcode();
@@ -5753,7 +5762,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
 
     // Otherwise take the unions of the known bit sets of the operands,
     // taking conservative care to avoid excessive recursion.
-    const unsigned PhiRecursionLimit = getAnalysisRecursionDepthLimit() - 2;
+    const unsigned PhiRecursionLimit = DepthLimit::getMaxRecursionDepth() - 2;
 
     if (Depth < PhiRecursionLimit) {
       // Skip if every incoming value references to ourself.
@@ -7568,7 +7577,8 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
 static bool isGuaranteedNotToBeUndefOrPoison(
     const Value *V, AssumptionCache *AC, const Instruction *CtxI,
     const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
-  if (Depth >= getAnalysisRecursionDepthLimit())
+  if (Depth >= DepthLimit::getMaxRecursionDepth(
+                   DepthLimit::VTCycle::NOTUNDEFPOISON, V, Depth))
     return false;
 
   if (isa<MetadataAsValue>(V))
@@ -8904,7 +8914,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
 SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                                              Instruction::CastOps *CastOp,
                                              unsigned Depth) {
-  if (Depth >= getAnalysisRecursionDepthLimit())
+  if (Depth >= DepthLimit::getMaxRecursionDepth())
     return {SPF_UNKNOWN, SPNB_NA, false};
 
   SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -9323,10 +9333,12 @@ isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
     // C1` (see discussion: D58633).
     ConstantRange LCR = computeConstantRange(
         L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
-        /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
+        /*CxtI=*/nullptr, /*DT=*/nullptr,
+        DepthLimit::getMaxRecursionDepth() - 1);
     ConstantRange RCR = computeConstantRange(
         R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
-        /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
+        /*CxtI=*/nullptr, /*DT=*/nullptr,
+        DepthLimit::getMaxRecursionDepth() - 1);
     // Even if L1/R1 are not both constant, we can still sometimes deduce
     // relationship from a single constant. For example X u> Y implies X != 0.
     if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
@@ -9391,7 +9403,7 @@ isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
           LHS->getOpcode() == Instruction::Select) &&
          "Expected LHS to be 'and', 'or', or 'select'.");
 
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Hit recursion limit");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Hit recursion limit");
 
   // If the result of an 'or' is false, then we know both legs of the 'or' are
   // false.  Similarly, if the result of an 'and' is true, then we know both
@@ -9416,7 +9428,8 @@ llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
                          const Value *RHSOp0, const Value *RHSOp1,
                          const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
   // Bail out when we hit the limit.
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth == DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::IMPLIED,
+                                                LHS, Depth))
     return std::nullopt;
 
   // A mismatch occurs when we compare a scalar cmp to a vector cmp, for
@@ -9487,7 +9500,8 @@ std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
     return std::nullopt;
   }
 
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth == DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::IMPLIED,
+                                                LHS, Depth))
     return std::nullopt;
 
   // LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
@@ -9949,7 +9963,8 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
                                          unsigned Depth) {
   assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
 
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth ==
+      DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::RANGE, V, Depth))
     return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
 
   if (auto *C = dyn_cast<Constant>(V))
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index b9a1b72001ded..a931aa0f800e2 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1918,7 +1918,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
                                              const MachineRegisterInfo &MRI,
                                              unsigned Depth,
                                              UndefPoisonKind Kind) {
-  if (Depth >= getAnalysisRecursionDepthLimit())
+  if (Depth >= DepthLimit::getMaxRecursionDepth())
     return false;
 
   MachineInstr *RegDef = MRI.getVRegDef(Reg);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2fb4bfecda8aa..0e85ff7bcd19f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -5082,10 +5082,10 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
   if (Instruction *Abs = canonicalizeAbs(I, Builder))
     return Abs;
 
-  // Otherwise, if all else failed, try to hoist the xor-by-constant:
-  //   (X ^ C) ^ Y --> (X ^ Y) ^ C
-  // Just like we do in other places, we completely avoid the fold
-  // for constantexprs, at least to avoid endless combine loop.
+  //  Otherwise, if all else failed, try to hoist the xor-by-constant:
+  //    (X ^ C) ^ Y --> (X ^ Y) ^ C
+  //  Just like we do in other places, we completely avoid the fold
+  //  for constantexprs, at least to avoid endless combine loop.
   if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X),
                                                     m_Unless(m_ConstantExpr())),
                                        m_ImmConstant(C1))),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ffda554c84e83..cc6c0cd6e7e01 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4390,7 +4390,7 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
     return true;
   if (V->getType()->getScalarSizeInBits() == 1)
     return true;
-  if (Depth++ >= getAnalysisRecursionDepthLimit())
+  if (Depth++ >= DepthLimit::getMaxRecursionDepth())
     return false;
   Value *X;
   const Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6fe80e01eec7d..4ea7ecb335dee 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1533,7 +1533,7 @@ Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero,
     });
 
   // The remaining tests are all recursive, so bail out if we hit the limit.
-  if (Depth++ == getAnalysisRecursionDepthLimit())
+  if (Depth++ == DepthLimit::getMaxRecursionDepth())
     return nullptr;
 
   // log2(zext X) -> zext log2(X)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 0bf05f975ffaa..1ec8fd8dc9f77 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3641,7 +3641,7 @@ static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
 /// select condition.
 static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
                              unsigned Depth) {
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth == DepthLimit::getMaxRecursionDepth())
     return false;
 
   // Ignore the case where the select arm itself is affected. These cases
@@ -3651,9 +3651,9 @@ static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
 
   if (auto *I = dyn_cast<Instruction>(V)) {
     if (isa<PHINode>(I)) {
-      if (Depth == getAnalysisRecursionDepthLimit() - 1)
+      if (Depth == DepthLimit::getMaxRecursionDepth() - 1)
         return false;
-      Depth = getAnalysisRecursionDepthLimit() - 2;
+      Depth = DepthLimit::getMaxRecursionDepth() - 2;
     }
     return any_of(I->operands(), [&](Value *Op) {
       return Op->getType()->isIntOrIntVectorTy() &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 595fa2c5c7e26..46ac778d68d5f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -117,7 +117,7 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
     return false;
   }
 
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth == DepthLimit::getMaxRecursionDepth())
     return false;
 
   Value *NewVal;
@@ -167,7 +167,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
                                                  const SimplifyQuery &Q,
                                                  unsigned Depth) {
   assert(I != nullptr && "Null pointer of Value???");
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
   uint32_t BitWidth = DemandedMask.getBitWidth();
   Type *VTy = I->getType();
   assert(
@@ -1452,7 +1452,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
 
   // Limit search depth.
   if (Depth == SimplifyDemandedVectorEltsDepthLimit &&
-      Depth >= getAnalysisRecursionDepthLimit())
+      Depth >= DepthLimit::getMaxRecursionDepth())
     return nullptr;
 
   if (!AllowMultipleUsers) {
@@ -1966,10 +1966,12 @@ static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {
   }
 }
 
-Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
-    Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,
-    Instruction *CxtI, unsigned Depth) {
-  assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
+                                                    FPClassTest DemandedMask,
+                                                    KnownFPClass &Known,
+                                                    Instruction *CxtI,
+                                                    unsigned Depth) {
+  assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
   Type *VTy = V->getType();
 
   assert(Known == KnownFPClass() && "expected uninitialized state");
@@ -1977,7 +1979,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
   if (DemandedMask == fcNone)
     return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
 
-  if (Depth == getAnalysisRecursionDepthLimit())
+  if (Depth == DepthLimit::getMaxRecursionDepth())
     return nullptr;
 
   Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1a245631438dc..60e5d5fb0c3d7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2594,7 +2594,7 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
   if (match(V, m_ImmConstant(C)))
     return ConstantExpr::getNot(C);
 
-  if (Depth++ >= getAnalysisRecursionDepthLimit())
+  if (Depth++ >= DepthLimit::getMaxRecursionDepth())
     return nullptr;
 
   // The rest of the cases require that we invert all uses so don't bother
@@ -2689,7 +2689,7 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
       Value *NewIncomingVal =
           getFreelyInvertedImpl(U.get(), /*WillInvertAllUses=*/false,
                                 /*Builder=*/nullptr, LocalDoesConsume,
-                                getAnalysisRecursionDepthLimit() - 1);
+                                DepthLimit::getMaxRecursionDepth() - 1);
       if (NewIncomingVal == nullptr)
         return nullptr;
       // Make sure that we can safely erase the original PHI node.
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index b57b82a97d743..549d4cf0f426f 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -823,9 +823,9 @@ ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
   // with the signed <-> unsigned transfer logic.
   if (CmpInst::isSigned(Pred) &&
       isKnownNonNegative(Op0, DL,
-                         /*Depth=*/getAnalysisRecursionDepthLimit() - 1) &&
+                         /*Depth=*/DepthLimit::getMaxRecursionDepth() - 1) &&
       isKnownNonNegative(Op1, DL,
-                         /*Depth=*/getAnalysisRecursionDepthLimit() - 1))
+                         /*Depth=*/DepthLimit::getMaxRecursionDepth() - 1))
     Pred = ICmpInst::getUnsignedPredicate(Pred);
 
   SmallVector<Value *> NewVariables;
@@ -899,7 +899,7 @@ void ConstraintInfo::transferToOtherSystem(
   auto IsKnownNonNegative = [this](Value *V) {
     return doesHold(CmpInst::ICMP_SGE, V, ConstantInt::get(V->getType(), 0)) ||
            isKnownNonNegative(V, DL,
-                              /*Depth=*/getAnalysisRecursionDepthLimit() - 1);
+                              /*Depth=*/DepthLimit::getMaxRecursionDepth() - 1);
   };
   // Check if we can combine facts from the signed and unsigned systems to
   // derive additional facts.
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 320b79203c0b3..26a2ba7b58108 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -631,8 +631,27 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
     ConstantOffset = CI->getValue();
   } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
     // Trace into subexpressions for more hoisting opportunities.
-    if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
-      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+    BinaryOperator *TraceInto = BO;
+    if (BO->getOpcode() == Instruction::Xor &&
+        isa<ConstantInt>(BO->getOperand(1))) {
+      KnownBits LHSKnown(BO->getOperand(0)->getType()->getScalarSizeInBits());
+      KnownBits RHSKnown(BO->getOperand(1)->getType()->getScalarSizeInBits());
+      computeKnownBitsExhaustive(BO->getOperand(0), LHSKnown, DL);
+      computeKnownBitsExhaustive(BO->getOperand(1), RHSKnown, DL);
+      if (KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown)) {
+        IRBuilder<> Builder(BO);
+        TraceInto = cast<BinaryOperator>(
+            Builder.CreateOr(BO->getOperand(0), BO->getOperand(1)));
+        cast<PossiblyDisjointInst>(TraceInto)->setIsDisjoint(true);
+        BO->replaceAllUsesWith(TraceInto);
+        BO->eraseFromParent();
+      }
+    }
+
+    if (CanTraceInto(SignExtended, ZeroExtended, TraceInto, NonNegative)) {
+      ConstantOffset =
+          findInEitherOperand(TraceInto, SignExtended, ZeroExtended);
+    }
   } else if (isa<TruncInst>(V)) {
     ConstantOffset =
         find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll
new file mode 100644
index 0000000000000..3b0d952396425
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --passes=separate-const-offset-from-gep < %s | FileCheck %s
+
+ at global_smem = external addrspace(3) global [0 x i8], align 16
+
+define amdgpu_kernel void @matmul_kernel(ptr addrspace(1) %inptr, <4 x i32> %data0, <4 x i32> %data1, i1 %cond) {
+entry:
+  %28 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %29 = and i32 %28, 8
+  %.not = icmp eq i32 %29, 0
+  %30 = and i32 %28, 16
+  %31 = icmp eq i32 %30, 0
+  %32 = and i32 %28, 32
+  %33 = icmp eq i32 %32, 0
+  %34 = and i32 %28, 256
+  %53 = shl i32 %28, 3
+  %54 = and i32 %53, 56
+  %121 = select i1 %.not, i32 0, i32 72
+  %122 = select i1 %31, i32 0, i32 144
+  %123 = or disjoint i32 %121, %122
+  %124 = select i1 %33, i32 0, i32 288
+  %125 = or disjoint i32 %123, %124
+  %126 = xor i32 %125, %54
+  %127 = and i32 %53, 1536
+  %128 = or disjoint i32 %127, %126
+  %129 = shl nuw nsw i32 %34, 3
+  %130 = or disjoint i32 %128, %129
+  %132 = xor i32 %130, 4096
+  %133 = getelementptr inbounds nuw half, ptr addrspace(3) @global_smem, i32 %132
+  br i1 %cond, label %s1, label %s2
+
+s1:
+  store <4 x i32> %data0, ptr addrspace(3) %133, align 16
+  br label %end
+
+s2:
+  br label %end
+
+end:
+  ret void
+}