[llvm] [ValueTracking] Make the MaxAnalysisRecursionDepth overridable (PR #137721)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 4 14:56:54 PDT 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/137721
>From 0590e5e859458225b1471a9f8699068dfa87afbb Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Mon, 28 Apr 2025 14:37:25 -0700
Subject: [PATCH 1/2] [ValueTracking] Make the MaxAnalysisRecursionDepth
overridable
Change-Id: Id715df21631213e3ac77bf6d24a41375dab194b9
---
llvm/include/llvm/Analysis/ValueTracking.h | 2 +
llvm/lib/Analysis/ValueTracking.cpp | 69 +++++++++-------
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +-
.../InstCombine/InstCombineCompares.cpp | 2 +-
.../InstCombine/InstCombineMulDivRem.cpp | 2 +-
.../InstCombine/InstCombineSelect.cpp | 6 +-
.../InstCombineSimplifyDemanded.cpp | 19 +++--
.../InstCombine/InstructionCombining.cpp | 9 ++-
.../Scalar/ConstraintElimination.cpp | 9 ++-
.../InstCombine/simplifydemanded-depth.ll | 80 +++++++++++++++++++
10 files changed, 147 insertions(+), 53 deletions(-)
create mode 100644 llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index b05b8f349b8d5..412ba32072e13 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -45,6 +45,8 @@ template <typename T> class ArrayRef;
constexpr unsigned MaxAnalysisRecursionDepth = 6;
+unsigned getAnalysisRecursionDepthLimit();
+
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
///
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 0a460786d00ea..af4b69d0b5619 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -90,6 +90,8 @@ using namespace llvm::PatternMatch;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
+static cl::opt<bool> ExhaustiveRecursion("exhaustive-analysis-recursion",
+ cl::Hidden);
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
@@ -131,6 +133,12 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
DemandedElts, DemandedLHS, DemandedRHS);
}
+unsigned llvm::getAnalysisRecursionDepthLimit() {
+ if (!ExhaustiveRecursion.getNumOccurrences() || !ExhaustiveRecursion)
+ return MaxAnalysisRecursionDepth;
+ return -1;
+}
+
static void computeKnownBits(const Value *V, const APInt &DemandedElts,
KnownBits &Known, const SimplifyQuery &Q,
unsigned Depth);
@@ -798,7 +806,7 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
KnownBits &Known, const SimplifyQuery &SQ,
bool Invert, unsigned Depth) {
Value *A, *B;
- if (Depth < MaxAnalysisRecursionDepth &&
+ if (Depth < getAnalysisRecursionDepthLimit() &&
match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
KnownBits Known2(Known.getBitWidth());
KnownBits Known3(Known.getBitWidth());
@@ -833,7 +841,8 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
return;
}
- if (Depth < MaxAnalysisRecursionDepth && match(Cond, m_Not(m_Value(A))))
+ if (Depth < getAnalysisRecursionDepthLimit() &&
+ match(Cond, m_Not(m_Value(A))))
computeKnownBitsFromCond(V, A, Known, SQ, !Invert, Depth + 1);
}
@@ -927,7 +936,7 @@ void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
}
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -1696,7 +1705,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- if (Depth < MaxAnalysisRecursionDepth - 1 && Known.isUnknown()) {
+ if (Depth < getAnalysisRecursionDepthLimit() - 1 && Known.isUnknown()) {
// Skip if every incoming value references to ourself.
if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
break;
@@ -1725,7 +1734,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// TODO: See if we can base recursion limiter on number of incoming phi
// edges so we don't overly clamp analysis.
computeKnownBits(IncValue, DemandedElts, Known2, RecQ,
- MaxAnalysisRecursionDepth - 1);
+ getAnalysisRecursionDepthLimit() - 1);
// See if we can further use a conditional branch into the phi
// to help us determine the range of the value.
@@ -2194,7 +2203,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
}
assert(V && "No Value?");
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
#ifndef NDEBUG
Type *Ty = V->getType();
@@ -2293,7 +2302,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
Known = Range->toKnownBits();
// All recursive calls that increase depth must come after this.
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return;
// A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
@@ -2406,7 +2415,7 @@ static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
/// types and vectors of integers.
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
const SimplifyQuery &Q, unsigned Depth) {
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
if (isa<Constant>(V))
return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
@@ -2468,7 +2477,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
return true;
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == MaxAnalysisRecursionDepth)
+ if (Depth++ == getAnalysisRecursionDepthLimit())
return false;
switch (I->getOpcode()) {
@@ -2556,7 +2565,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
// Recursively check all incoming values. Limit recursion to 2 levels, so
// that search complexity is limited to number of operands^2.
- unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+ unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
// Value is power of 2 if it is coming from PHI node itself by induction.
if (U.get() == PN)
@@ -2660,7 +2669,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, const SimplifyQuery &Q,
// to recurse 10k times just because we have 10k GEP operands. We don't
// bail completely out because we want to handle constant GEPs regardless
// of depth.
- if (Depth++ >= MaxAnalysisRecursionDepth)
+ if (Depth++ >= getAnalysisRecursionDepthLimit())
continue;
if (isKnownNonZero(GTI.getOperand(), Q, Depth))
@@ -3164,7 +3173,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
// Check if all incoming values are non-zero using recursion.
SimplifyQuery RecQ = Q.getWithoutCondContext();
- unsigned NewDepth = std::max(Depth, MaxAnalysisRecursionDepth - 1);
+ unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
if (U.get() == PN)
return true;
@@ -3430,7 +3439,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
Type *Ty = V->getType();
#ifndef NDEBUG
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
assert(
@@ -3493,7 +3502,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
return true;
// Some of the tests below are recursive, so bail out if we hit the limit.
- if (Depth++ >= MaxAnalysisRecursionDepth)
+ if (Depth++ >= getAnalysisRecursionDepthLimit())
return false;
// Check for pointer simplifications.
@@ -3877,7 +3886,7 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2,
// We can't look through casts yet.
return false;
- if (Depth >= MaxAnalysisRecursionDepth)
+ if (Depth >= getAnalysisRecursionDepthLimit())
return false;
// See if we can recurse through (exactly one of) our operands. This
@@ -3994,7 +4003,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
const SimplifyQuery &Q, unsigned Depth) {
Type *Ty = V->getType();
#ifndef NDEBUG
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
assert(
@@ -4021,7 +4030,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Note that ConstantInt is handled by the general computeKnownBits case
// below.
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return 1;
if (auto *U = dyn_cast<Operator>(V)) {
@@ -4555,7 +4564,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
KnownFPClass &KnownFromContext,
unsigned Depth = 0) {
Value *A, *B;
- if (Depth < MaxAnalysisRecursionDepth &&
+ if (Depth < getAnalysisRecursionDepthLimit() &&
(CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
: match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
computeKnownFPClassFromCond(V, A, CondIsTrue, CxtI, KnownFromContext,
@@ -4564,7 +4573,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
Depth + 1);
return;
}
- if (Depth < MaxAnalysisRecursionDepth && match(Cond, m_Not(m_Value(A)))) {
+ if (Depth < getAnalysisRecursionDepthLimit() && match(Cond, m_Not(m_Value(A)))) {
computeKnownFPClassFromCond(V, A, !CondIsTrue, CxtI, KnownFromContext,
Depth + 1);
return;
@@ -4696,7 +4705,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
}
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
if (auto *CFP = dyn_cast<ConstantFP>(V)) {
Known.KnownFPClasses = CFP->getValueAPF().classify();
@@ -4790,7 +4799,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
// All recursive calls that increase depth must come after this.
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return;
const unsigned Opc = Op->getOpcode();
@@ -5744,7 +5753,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- const unsigned PhiRecursionLimit = MaxAnalysisRecursionDepth - 2;
+ const unsigned PhiRecursionLimit = getAnalysisRecursionDepthLimit() - 2;
if (Depth < PhiRecursionLimit) {
// Skip if every incoming value references to ourself.
@@ -7559,7 +7568,7 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
static bool isGuaranteedNotToBeUndefOrPoison(
const Value *V, AssumptionCache *AC, const Instruction *CtxI,
const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
- if (Depth >= MaxAnalysisRecursionDepth)
+ if (Depth >= getAnalysisRecursionDepthLimit())
return false;
if (isa<MetadataAsValue>(V))
@@ -8895,7 +8904,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp,
unsigned Depth) {
- if (Depth >= MaxAnalysisRecursionDepth)
+ if (Depth >= getAnalysisRecursionDepthLimit())
return {SPF_UNKNOWN, SPNB_NA, false};
SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -9314,10 +9323,10 @@ isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
// C1` (see discussion: D58633).
ConstantRange LCR = computeConstantRange(
L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
- /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
+ /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
ConstantRange RCR = computeConstantRange(
R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
- /*CxtI=*/nullptr, /*DT=*/nullptr, MaxAnalysisRecursionDepth - 1);
+ /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
// Even if L1/R1 are not both constant, we can still sometimes deduce
// relationship from a single constant. For example X u> Y implies X != 0.
if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
@@ -9382,7 +9391,7 @@ isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
LHS->getOpcode() == Instruction::Select) &&
"Expected LHS to be 'and', 'or', or 'select'.");
- assert(Depth <= MaxAnalysisRecursionDepth && "Hit recursion limit");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Hit recursion limit");
// If the result of an 'or' is false, then we know both legs of the 'or' are
// false. Similarly, if the result of an 'and' is true, then we know both
@@ -9407,7 +9416,7 @@ llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
const Value *RHSOp0, const Value *RHSOp1,
const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
// Bail out when we hit the limit.
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return std::nullopt;
// A mismatch occurs when we compare a scalar cmp to a vector cmp, for
@@ -9478,7 +9487,7 @@ std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
return std::nullopt;
}
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return std::nullopt;
// LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
@@ -9940,7 +9949,7 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
unsigned Depth) {
assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
if (auto *C = dyn_cast<Constant>(V))
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 64af7a57e8d12..b9a1b72001ded 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1918,7 +1918,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
const MachineRegisterInfo &MRI,
unsigned Depth,
UndefPoisonKind Kind) {
- if (Depth >= MaxAnalysisRecursionDepth)
+ if (Depth >= getAnalysisRecursionDepthLimit())
return false;
MachineInstr *RegDef = MRI.getVRegDef(Reg);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c112fae351817..ffda554c84e83 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4390,7 +4390,7 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
return true;
if (V->getType()->getScalarSizeInBits() == 1)
return true;
- if (Depth++ >= MaxAnalysisRecursionDepth)
+ if (Depth++ >= getAnalysisRecursionDepthLimit())
return false;
Value *X;
const Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 457199a72510e..6fe80e01eec7d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1533,7 +1533,7 @@ Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero,
});
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == MaxAnalysisRecursionDepth)
+ if (Depth++ == getAnalysisRecursionDepthLimit())
return nullptr;
// log2(zext X) -> zext log2(X)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d7d0431a5b8d0..0bf05f975ffaa 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3641,7 +3641,7 @@ static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
/// select condition.
static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
unsigned Depth) {
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return false;
// Ignore the case where the select arm itself is affected. These cases
@@ -3651,9 +3651,9 @@ static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
if (auto *I = dyn_cast<Instruction>(V)) {
if (isa<PHINode>(I)) {
- if (Depth == MaxAnalysisRecursionDepth - 1)
+ if (Depth == getAnalysisRecursionDepthLimit() - 1)
return false;
- Depth = MaxAnalysisRecursionDepth - 2;
+ Depth = getAnalysisRecursionDepthLimit() - 2;
}
return any_of(I->operands(), [&](Value *Op) {
return Op->getType()->isIntOrIntVectorTy() &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 0e3436d12702d..595fa2c5c7e26 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -117,7 +117,7 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
return false;
}
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return false;
Value *NewVal;
@@ -167,7 +167,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
const SimplifyQuery &Q,
unsigned Depth) {
assert(I != nullptr && "Null pointer of Value???");
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = I->getType();
assert(
@@ -1451,7 +1451,8 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
}
// Limit search depth.
- if (Depth == SimplifyDemandedVectorEltsDepthLimit)
+ if (Depth == SimplifyDemandedVectorEltsDepthLimit &&
+ Depth >= getAnalysisRecursionDepthLimit())
return nullptr;
if (!AllowMultipleUsers) {
@@ -1965,12 +1966,10 @@ static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {
}
}
-Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
- FPClassTest DemandedMask,
- KnownFPClass &Known,
- Instruction *CxtI,
- unsigned Depth) {
- assert(Depth <= MaxAnalysisRecursionDepth && "Limit Search Depth");
+Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
+ Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,
+ Instruction *CxtI, unsigned Depth) {
+ assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
Type *VTy = V->getType();
assert(Known == KnownFPClass() && "expected uninitialized state");
@@ -1978,7 +1977,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
if (DemandedMask == fcNone)
return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
- if (Depth == MaxAnalysisRecursionDepth)
+ if (Depth == getAnalysisRecursionDepthLimit())
return nullptr;
Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 439a86d951a83..1a245631438dc 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2594,7 +2594,7 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
if (match(V, m_ImmConstant(C)))
return ConstantExpr::getNot(C);
- if (Depth++ >= MaxAnalysisRecursionDepth)
+ if (Depth++ >= getAnalysisRecursionDepthLimit())
return nullptr;
// The rest of the cases require that we invert all uses so don't bother
@@ -2686,9 +2686,10 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
SmallVector<std::pair<Value *, BasicBlock *>, 8> IncomingValues;
for (Use &U : PN->operands()) {
BasicBlock *IncomingBlock = PN->getIncomingBlock(U);
- Value *NewIncomingVal = getFreelyInvertedImpl(
- U.get(), /*WillInvertAllUses=*/false,
- /*Builder=*/nullptr, LocalDoesConsume, MaxAnalysisRecursionDepth - 1);
+ Value *NewIncomingVal =
+ getFreelyInvertedImpl(U.get(), /*WillInvertAllUses=*/false,
+ /*Builder=*/nullptr, LocalDoesConsume,
+ getAnalysisRecursionDepthLimit() - 1);
if (NewIncomingVal == nullptr)
return nullptr;
// Make sure that we can safely erase the original PHI node.
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index cbad5dd357687..b57b82a97d743 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -822,8 +822,10 @@ ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
// unsigned ones. This increases the reasoning effectiveness in combination
// with the signed <-> unsigned transfer logic.
if (CmpInst::isSigned(Pred) &&
- isKnownNonNegative(Op0, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1) &&
- isKnownNonNegative(Op1, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1))
+ isKnownNonNegative(Op0, DL,
+ /*Depth=*/getAnalysisRecursionDepthLimit() - 1) &&
+ isKnownNonNegative(Op1, DL,
+ /*Depth=*/getAnalysisRecursionDepthLimit() - 1))
Pred = ICmpInst::getUnsignedPredicate(Pred);
SmallVector<Value *> NewVariables;
@@ -896,7 +898,8 @@ void ConstraintInfo::transferToOtherSystem(
unsigned NumOut, SmallVectorImpl<StackEntry> &DFSInStack) {
auto IsKnownNonNegative = [this](Value *V) {
return doesHold(CmpInst::ICMP_SGE, V, ConstantInt::get(V->getType(), 0)) ||
- isKnownNonNegative(V, DL, /*Depth=*/MaxAnalysisRecursionDepth - 1);
+ isKnownNonNegative(V, DL,
+ /*Depth=*/getAnalysisRecursionDepthLimit() - 1);
};
// Check if we can combine facts from the signed and unsigned systems to
// derive additional facts.
diff --git a/llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll b/llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll
new file mode 100644
index 0000000000000..228877879a31b
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/simplifydemanded-depth.ll
@@ -0,0 +1,80 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S < %s -passes=instcombine | FileCheck -check-prefixes=DEFAULT %s
+; RUN: opt -S < %s -passes=instcombine -exhaustive-analysis-recursion=true | FileCheck -check-prefixes=EXHUASTIVE %s
+
+declare i32 @callee()
+
+define i32 @test5(i1 %C) {
+; DEFAULT-LABEL: @test5(
+; DEFAULT-NEXT: [[VALUE:%.*]] = tail call i32 @callee(), !range [[RNG0:![0-9]+]]
+; DEFAULT-NEXT: [[VALUE_MASK0:%.*]] = and i32 [[VALUE]], 8
+; DEFAULT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[VALUE_MASK0]], 0
+; DEFAULT-NEXT: [[VALUE_MASK1:%.*]] = and i32 [[VALUE]], 16
+; DEFAULT-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VALUE_MASK1]], 0
+; DEFAULT-NEXT: [[VALUE_MASK2:%.*]] = and i32 [[VALUE]], 32
+; DEFAULT-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VALUE_MASK2]], 0
+; DEFAULT-NEXT: [[VALUE_SHIFT0:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; DEFAULT-NEXT: [[VALUE_MASK4:%.*]] = and i32 [[VALUE_SHIFT0]], 56
+; DEFAULT-NEXT: [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; DEFAULT-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; DEFAULT-NEXT: [[COMBINED_01:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; DEFAULT-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; DEFAULT-NEXT: [[COMBINED_012:%.*]] = or disjoint i32 [[COMBINED_01]], [[SEL2]]
+; DEFAULT-NEXT: [[COMBINED_0123:%.*]] = xor i32 [[COMBINED_012]], [[VALUE_MASK4]]
+; DEFAULT-NEXT: [[VALUE_SHIFT1:%.*]] = and i32 [[VALUE_SHIFT0]], 1536
+; DEFAULT-NEXT: [[OP0:%.*]] = or disjoint i32 [[VALUE_SHIFT1]], [[COMBINED_0123]]
+; DEFAULT-NEXT: [[VALUE_MASK3:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; DEFAULT-NEXT: [[OP1:%.*]] = and i32 [[VALUE_MASK3]], 2048
+; DEFAULT-NEXT: [[BASE:%.*]] = or disjoint i32 [[OP0]], [[OP1]]
+; DEFAULT-NEXT: [[XOR:%.*]] = xor i32 [[BASE]], 4096
+; DEFAULT-NEXT: ret i32 [[XOR]]
+;
+; EXHUASTIVE-LABEL: @test5(
+; EXHUASTIVE-NEXT: [[VALUE:%.*]] = tail call i32 @callee(), !range [[RNG0:![0-9]+]]
+; EXHUASTIVE-NEXT: [[VALUE_MASK0:%.*]] = and i32 [[VALUE]], 8
+; EXHUASTIVE-NEXT: [[CMP0:%.*]] = icmp eq i32 [[VALUE_MASK0]], 0
+; EXHUASTIVE-NEXT: [[VALUE_MASK1:%.*]] = and i32 [[VALUE]], 16
+; EXHUASTIVE-NEXT: [[CMP1:%.*]] = icmp eq i32 [[VALUE_MASK1]], 0
+; EXHUASTIVE-NEXT: [[VALUE_MASK2:%.*]] = and i32 [[VALUE]], 32
+; EXHUASTIVE-NEXT: [[CMP2:%.*]] = icmp eq i32 [[VALUE_MASK2]], 0
+; EXHUASTIVE-NEXT: [[VALUE_SHIFT0:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; EXHUASTIVE-NEXT: [[VALUE_MASK4:%.*]] = and i32 [[VALUE_SHIFT0]], 56
+; EXHUASTIVE-NEXT: [[SEL0:%.*]] = select i1 [[CMP0]], i32 0, i32 72
+; EXHUASTIVE-NEXT: [[SEL1:%.*]] = select i1 [[CMP1]], i32 0, i32 144
+; EXHUASTIVE-NEXT: [[COMBINED_01:%.*]] = or disjoint i32 [[SEL0]], [[SEL1]]
+; EXHUASTIVE-NEXT: [[SEL2:%.*]] = select i1 [[CMP2]], i32 0, i32 288
+; EXHUASTIVE-NEXT: [[COMBINED_012:%.*]] = or disjoint i32 [[COMBINED_01]], [[SEL2]]
+; EXHUASTIVE-NEXT: [[COMBINED_0123:%.*]] = xor i32 [[COMBINED_012]], [[VALUE_MASK4]]
+; EXHUASTIVE-NEXT: [[VALUE_SHIFT1:%.*]] = and i32 [[VALUE_SHIFT0]], 1536
+; EXHUASTIVE-NEXT: [[OP0:%.*]] = or disjoint i32 [[VALUE_SHIFT1]], [[COMBINED_0123]]
+; EXHUASTIVE-NEXT: [[VALUE_MASK3:%.*]] = shl nuw nsw i32 [[VALUE]], 3
+; EXHUASTIVE-NEXT: [[OP1:%.*]] = and i32 [[VALUE_MASK3]], 2048
+; EXHUASTIVE-NEXT: [[BASE:%.*]] = or disjoint i32 [[OP0]], [[OP1]]
+; EXHUASTIVE-NEXT: [[XOR:%.*]] = or disjoint i32 [[BASE]], 4096
+; EXHUASTIVE-NEXT: ret i32 [[XOR]]
+;
+ %value = tail call i32 @callee(), !range !0
+ %value_mask0 = and i32 %value, 8
+ %cmp0 = icmp eq i32 %value_mask0, 0
+ %value_mask1 = and i32 %value, 16
+ %cmp1 = icmp eq i32 %value_mask1, 0
+ %value_mask2 = and i32 %value, 32
+ %cmp2 = icmp eq i32 %value_mask2, 0
+ %value_mask3 = and i32 %value, 256
+ %value_shift0 = shl i32 %value, 3
+ %value_mask4 = and i32 %value_shift0, 56
+ %sel0 = select i1 %cmp0, i32 0, i32 72 ; lane_bit3 * 73
+ %sel1 = select i1 %cmp1, i32 0, i32 144 ; lane_bit4 * 144
+ %combined_01 = or disjoint i32 %sel0, %sel1 ;
+ %sel2 = select i1 %cmp2, i32 0, i32 288 ; lane_bit5 * 288
+ %combined_012 = or disjoint i32 %combined_01, %sel2
+ %combined_0123 = xor i32 %combined_012, %value_mask4
+ %value_shift1 = and i32 %value_shift0, 1536
+ %op0 = or disjoint i32 %value_shift1, %combined_0123
+ %op1 = shl nuw nsw i32 %value_mask3, 3
+ %base = or disjoint i32 %op0, %op1
+ %xor = xor i32 %base, 4096
+ ret i32 %xor
+}
+
+!0 = !{ i32 0, i32 2048 }
>From 489fc1ccde12963855b20e1c0f5fefbaf232fd98 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Wed, 28 May 2025 17:07:04 -0700
Subject: [PATCH 2/2] computeKnownBitsExhaustive + usecase + infinite recursion
tracking
Change-Id: Icf0dac8c87812aa8edff53a1d5cdd664ab6d6d12
---
llvm/include/llvm/Analysis/ValueTracking.h | 66 ++++++++++++-
llvm/lib/Analysis/ValueTracking.cpp | 93 +++++++++++--------
llvm/lib/CodeGen/GlobalISel/Utils.cpp | 2 +-
.../InstCombine/InstCombineAndOrXor.cpp | 8 +-
.../InstCombine/InstCombineCompares.cpp | 2 +-
.../InstCombine/InstCombineMulDivRem.cpp | 2 +-
.../InstCombine/InstCombineSelect.cpp | 6 +-
.../InstCombineSimplifyDemanded.cpp | 18 ++--
.../InstCombine/InstructionCombining.cpp | 4 +-
.../Scalar/ConstraintElimination.cpp | 6 +-
.../Scalar/SeparateConstOffsetFromGEP.cpp | 23 ++++-
.../AMDGPU/xor-or-disjoint.ll | 41 ++++++++
12 files changed, 206 insertions(+), 65 deletions(-)
create mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 412ba32072e13..e2c4c5112224a 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -45,7 +45,64 @@ template <typename T> class ArrayRef;
constexpr unsigned MaxAnalysisRecursionDepth = 6;
-unsigned getAnalysisRecursionDepthLimit();
+class DepthLimit {
+public:
+ static DepthLimit &get() {
+ static DepthLimit Instance;
+ return Instance;
+ }
+
+ enum class VTCycle {
+ KNOWNBIT = 0,
+ KNOWNBITCOND = 1,
+ NONZERO = 2,
+ NONEQUAL = 3,
+ IMPLIED = 4,
+ FPCLASS = 5,
+ RANGE = 6,
+ SIGNBITS = 7,
+ NOTUNDEFPOISON = 8,
+ NONE = 9
+ };
+
+ static unsigned getMaxRecursionDepth(VTCycle Cycle, const Value *I,
+ unsigned Depth) {
+ if (!get().RecursionDepthOverride || Cycle == VTCycle::NONE)
+ return get().getMaxRecursionDepthImpl();
+
+ if (get().Encountered[Cycle].insert(I).second)
+ return get().getMaxRecursionDepthImpl();
+
+ return Depth;
+ }
+ static unsigned getMaxRecursionDepth() {
+ return get().getMaxRecursionDepthImpl();
+ }
+ static void setOverrideDepthLimit() { get().setOverrideDepthLimitImpl(); }
+ static void resetOverrideDepthLimit() { get().resetOverrideDepthLimitImpl(); }
+
+ DepthLimit(const DepthLimit &) = delete;
+ DepthLimit &operator=(const DepthLimit &) = delete;
+
+private:
+ DepthLimit() {}
+
+ const unsigned MaxAnalysisRecurionsDpeth = 6;
+ bool RecursionDepthOverride = false;
+
+ DenseMap<VTCycle, SmallPtrSet<const Value *, 8>> Encountered;
+
+ unsigned getMaxRecursionDepthImpl() {
+ return RecursionDepthOverride ? -1 : MaxAnalysisRecurionsDpeth;
+ }
+
+ void setOverrideDepthLimitImpl() { RecursionDepthOverride = true; }
+
+ void resetOverrideDepthLimitImpl() {
+ RecursionDepthOverride = false;
+ Encountered.clear();
+ }
+};
/// Determine which bits of V are known to be either zero or one and return
/// them in the KnownZero/KnownOne bit sets.
@@ -88,6 +145,13 @@ LLVM_ABI KnownBits computeKnownBits(const Value *V, const SimplifyQuery &Q,
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known,
const SimplifyQuery &Q, unsigned Depth = 0);
+void computeKnownBitsExhaustive(const Value *V, KnownBits &Known,
+ const DataLayout &DL,
+ AssumptionCache *AC = nullptr,
+ const Instruction *CxtI = nullptr,
+ const DominatorTree *DT = nullptr,
+ bool UseInstrInfo = true);
+
/// Compute known bits from the range metadata.
/// \p KnownZero the set of bits that are known to be zero
/// \p KnownOne the set of bits that are known to be one
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index af4b69d0b5619..bf8ca3766eecb 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -90,9 +90,6 @@ using namespace llvm::PatternMatch;
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
cl::Hidden, cl::init(20));
-static cl::opt<bool> ExhaustiveRecursion("exhaustive-analysis-recursion",
- cl::Hidden);
-
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
/// returns the element type's bitwidth.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -133,12 +130,6 @@ static bool getShuffleDemandedElts(const ShuffleVectorInst *Shuf,
DemandedElts, DemandedLHS, DemandedRHS);
}
-unsigned llvm::getAnalysisRecursionDepthLimit() {
- if (!ExhaustiveRecursion.getNumOccurrences() || !ExhaustiveRecursion)
- return MaxAnalysisRecursionDepth;
- return -1;
-}
-
static void computeKnownBits(const Value *V, const APInt &DemandedElts,
KnownBits &Known, const SimplifyQuery &Q,
unsigned Depth);
@@ -163,6 +154,16 @@ void llvm::computeKnownBits(const Value *V, KnownBits &Known,
Depth);
}
+void llvm::computeKnownBitsExhaustive(const Value *V, KnownBits &Known,
+ const DataLayout &DL, AssumptionCache *AC,
+ const Instruction *CxtI,
+ const DominatorTree *DT,
+ bool UseInstrInfo) {
+ DepthLimit::setOverrideDepthLimit();
+ computeKnownBits(V, Known, DL, AC, CxtI, DT, UseInstrInfo, /*Depth=*/0);
+ DepthLimit::resetOverrideDepthLimit();
+}
+
KnownBits llvm::computeKnownBits(const Value *V, const DataLayout &DL,
AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo,
@@ -806,7 +807,8 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
KnownBits &Known, const SimplifyQuery &SQ,
bool Invert, unsigned Depth) {
Value *A, *B;
- if (Depth < getAnalysisRecursionDepthLimit() &&
+ if (Depth < DepthLimit::getMaxRecursionDepth(
+ DepthLimit::VTCycle::KNOWNBITCOND, V, Depth) &&
match(Cond, m_LogicalOp(m_Value(A), m_Value(B)))) {
KnownBits Known2(Known.getBitWidth());
KnownBits Known3(Known.getBitWidth());
@@ -841,7 +843,7 @@ static void computeKnownBitsFromCond(const Value *V, Value *Cond,
return;
}
- if (Depth < getAnalysisRecursionDepthLimit() &&
+ if (Depth < DepthLimit::getMaxRecursionDepth() &&
match(Cond, m_Not(m_Value(A))))
computeKnownBitsFromCond(V, A, Known, SQ, !Invert, Depth + 1);
}
@@ -936,7 +938,7 @@ void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known,
}
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth == DepthLimit::getMaxRecursionDepth())
continue;
ICmpInst *Cmp = dyn_cast<ICmpInst>(Arg);
@@ -1705,7 +1707,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- if (Depth < getAnalysisRecursionDepthLimit() - 1 && Known.isUnknown()) {
+ if (Depth < DepthLimit::getMaxRecursionDepth() - 1 && Known.isUnknown()) {
// Skip if every incoming value references to ourself.
if (isa_and_nonnull<UndefValue>(P->hasConstantValue()))
break;
@@ -1734,7 +1736,7 @@ static void computeKnownBitsFromOperator(const Operator *I,
// TODO: See if we can base recursion limiter on number of incoming phi
// edges so we don't overly clamp analysis.
computeKnownBits(IncValue, DemandedElts, Known2, RecQ,
- getAnalysisRecursionDepthLimit() - 1);
+ DepthLimit::getMaxRecursionDepth() - 1);
// See if we can further use a conditional branch into the phi
// to help us determine the range of the value.
@@ -2203,7 +2205,7 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
}
assert(V && "No Value?");
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
#ifndef NDEBUG
Type *Ty = V->getType();
@@ -2302,7 +2304,8 @@ void computeKnownBits(const Value *V, const APInt &DemandedElts,
Known = Range->toKnownBits();
// All recursive calls that increase depth must come after this.
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth ==
+ DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::KNOWNBIT, V, Depth))
return;
// A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
@@ -2415,7 +2418,7 @@ static bool isImpliedToBeAPowerOfTwoFromCond(const Value *V, bool OrZero,
/// types and vectors of integers.
bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
const SimplifyQuery &Q, unsigned Depth) {
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
if (isa<Constant>(V))
return OrZero ? match(V, m_Power2OrZero()) : match(V, m_Power2());
@@ -2477,7 +2480,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
return true;
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == getAnalysisRecursionDepthLimit())
+ if (Depth++ == DepthLimit::getMaxRecursionDepth())
return false;
switch (I->getOpcode()) {
@@ -2565,7 +2568,7 @@ bool llvm::isKnownToBeAPowerOfTwo(const Value *V, bool OrZero,
// Recursively check all incoming values. Limit recursion to 2 levels, so
// that search complexity is limited to number of operands^2.
- unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
+ unsigned NewDepth = std::max(Depth, DepthLimit::getMaxRecursionDepth() - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
// Value is power of 2 if it is coming from PHI node itself by induction.
if (U.get() == PN)
@@ -2669,7 +2672,7 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, const SimplifyQuery &Q,
// to recurse 10k times just because we have 10k GEP operands. We don't
// bail completely out because we want to handle constant GEPs regardless
// of depth.
- if (Depth++ >= getAnalysisRecursionDepthLimit())
+ if (Depth++ >= DepthLimit::getMaxRecursionDepth())
continue;
if (isKnownNonZero(GTI.getOperand(), Q, Depth))
@@ -3173,7 +3176,7 @@ static bool isKnownNonZeroFromOperator(const Operator *I,
// Check if all incoming values are non-zero using recursion.
SimplifyQuery RecQ = Q.getWithoutCondContext();
- unsigned NewDepth = std::max(Depth, getAnalysisRecursionDepthLimit() - 1);
+ unsigned NewDepth = std::max(Depth, DepthLimit::getMaxRecursionDepth() - 1);
return llvm::all_of(PN->operands(), [&](const Use &U) {
if (U.get() == PN)
return true;
@@ -3439,7 +3442,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
Type *Ty = V->getType();
#ifndef NDEBUG
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
assert(
@@ -3502,9 +3505,11 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts,
return true;
// Some of the tests below are recursive, so bail out if we hit the limit.
- if (Depth++ >= getAnalysisRecursionDepthLimit())
+ if (Depth >=
+ DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::NONZERO, V, Depth))
return false;
+ ++Depth;
// Check for pointer simplifications.
if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
@@ -3886,7 +3891,8 @@ static bool isKnownNonEqual(const Value *V1, const Value *V2,
// We can't look through casts yet.
return false;
- if (Depth >= getAnalysisRecursionDepthLimit())
+ if (Depth >= DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::NONEQUAL,
+ V1, Depth))
return false;
// See if we can recurse through (exactly one of) our operands. This
@@ -4003,7 +4009,7 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
const SimplifyQuery &Q, unsigned Depth) {
Type *Ty = V->getType();
#ifndef NDEBUG
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
if (auto *FVTy = dyn_cast<FixedVectorType>(Ty)) {
assert(
@@ -4030,7 +4036,8 @@ static unsigned ComputeNumSignBitsImpl(const Value *V,
// Note that ConstantInt is handled by the general computeKnownBits case
// below.
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth ==
+ DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::SIGNBITS, V, Depth))
return 1;
if (auto *U = dyn_cast<Operator>(V)) {
@@ -4564,7 +4571,7 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
KnownFPClass &KnownFromContext,
unsigned Depth = 0) {
Value *A, *B;
- if (Depth < getAnalysisRecursionDepthLimit() &&
+ if (Depth < DepthLimit::getMaxRecursionDepth() &&
(CondIsTrue ? match(Cond, m_LogicalAnd(m_Value(A), m_Value(B)))
: match(Cond, m_LogicalOr(m_Value(A), m_Value(B))))) {
computeKnownFPClassFromCond(V, A, CondIsTrue, CxtI, KnownFromContext,
@@ -4573,7 +4580,8 @@ static void computeKnownFPClassFromCond(const Value *V, Value *Cond,
Depth + 1);
return;
}
- if (Depth < getAnalysisRecursionDepthLimit() && match(Cond, m_Not(m_Value(A)))) {
+ if (Depth < DepthLimit::getMaxRecursionDepth() &&
+ match(Cond, m_Not(m_Value(A)))) {
computeKnownFPClassFromCond(V, A, !CondIsTrue, CxtI, KnownFromContext,
Depth + 1);
return;
@@ -4705,7 +4713,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
}
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
if (auto *CFP = dyn_cast<ConstantFP>(V)) {
Known.KnownFPClasses = CFP->getValueAPF().classify();
@@ -4799,7 +4807,8 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
return;
// All recursive calls that increase depth must come after this.
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth ==
+ DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::FPCLASS, Op, Depth))
return;
const unsigned Opc = Op->getOpcode();
@@ -5753,7 +5762,7 @@ void computeKnownFPClass(const Value *V, const APInt &DemandedElts,
// Otherwise take the unions of the known bit sets of the operands,
// taking conservative care to avoid excessive recursion.
- const unsigned PhiRecursionLimit = getAnalysisRecursionDepthLimit() - 2;
+ const unsigned PhiRecursionLimit = DepthLimit::getMaxRecursionDepth() - 2;
if (Depth < PhiRecursionLimit) {
// Skip if every incoming value references to ourself.
@@ -7568,7 +7577,8 @@ static bool programUndefinedIfUndefOrPoison(const Value *V, bool PoisonOnly);
static bool isGuaranteedNotToBeUndefOrPoison(
const Value *V, AssumptionCache *AC, const Instruction *CtxI,
const DominatorTree *DT, unsigned Depth, UndefPoisonKind Kind) {
- if (Depth >= getAnalysisRecursionDepthLimit())
+ if (Depth >= DepthLimit::getMaxRecursionDepth(
+ DepthLimit::VTCycle::NOTUNDEFPOISON, V, Depth))
return false;
if (isa<MetadataAsValue>(V))
@@ -8904,7 +8914,7 @@ static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
Instruction::CastOps *CastOp,
unsigned Depth) {
- if (Depth >= getAnalysisRecursionDepthLimit())
+ if (Depth >= DepthLimit::getMaxRecursionDepth())
return {SPF_UNKNOWN, SPNB_NA, false};
SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -9323,10 +9333,12 @@ isImpliedCondICmps(CmpPredicate LPred, const Value *L0, const Value *L1,
// C1` (see discussion: D58633).
ConstantRange LCR = computeConstantRange(
L1, ICmpInst::isSigned(LPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
- /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
+ /*CxtI=*/nullptr, /*DT=*/nullptr,
+ DepthLimit::getMaxRecursionDepth() - 1);
ConstantRange RCR = computeConstantRange(
R1, ICmpInst::isSigned(RPred), /* UseInstrInfo=*/true, /*AC=*/nullptr,
- /*CxtI=*/nullptr, /*DT=*/nullptr, getAnalysisRecursionDepthLimit() - 1);
+ /*CxtI=*/nullptr, /*DT=*/nullptr,
+ DepthLimit::getMaxRecursionDepth() - 1);
// Even if L1/R1 are not both constant, we can still sometimes deduce
// relationship from a single constant. For example X u> Y implies X != 0.
if (auto R = isImpliedCondCommonOperandWithCR(LPred, LCR, RPred, RCR))
@@ -9391,7 +9403,7 @@ isImpliedCondAndOr(const Instruction *LHS, CmpPredicate RHSPred,
LHS->getOpcode() == Instruction::Select) &&
"Expected LHS to be 'and', 'or', or 'select'.");
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Hit recursion limit");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Hit recursion limit");
// If the result of an 'or' is false, then we know both legs of the 'or' are
// false. Similarly, if the result of an 'and' is true, then we know both
@@ -9416,7 +9428,8 @@ llvm::isImpliedCondition(const Value *LHS, CmpPredicate RHSPred,
const Value *RHSOp0, const Value *RHSOp1,
const DataLayout &DL, bool LHSIsTrue, unsigned Depth) {
// Bail out when we hit the limit.
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth == DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::IMPLIED,
+ LHS, Depth))
return std::nullopt;
// A mismatch occurs when we compare a scalar cmp to a vector cmp, for
@@ -9487,7 +9500,8 @@ std::optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
return std::nullopt;
}
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth == DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::IMPLIED,
+ LHS, Depth))
return std::nullopt;
// LHS ==> (RHS1 || RHS2) if LHS ==> RHS1 or LHS ==> RHS2
@@ -9949,7 +9963,8 @@ ConstantRange llvm::computeConstantRange(const Value *V, bool ForSigned,
unsigned Depth) {
assert(V->getType()->isIntOrIntVectorTy() && "Expected integer instruction");
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth ==
+ DepthLimit::getMaxRecursionDepth(DepthLimit::VTCycle::RANGE, V, Depth))
return ConstantRange::getFull(V->getType()->getScalarSizeInBits());
if (auto *C = dyn_cast<Constant>(V))
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index b9a1b72001ded..a931aa0f800e2 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1918,7 +1918,7 @@ static bool isGuaranteedNotToBeUndefOrPoison(Register Reg,
const MachineRegisterInfo &MRI,
unsigned Depth,
UndefPoisonKind Kind) {
- if (Depth >= getAnalysisRecursionDepthLimit())
+ if (Depth >= DepthLimit::getMaxRecursionDepth())
return false;
MachineInstr *RegDef = MRI.getVRegDef(Reg);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 2fb4bfecda8aa..0e85ff7bcd19f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -5082,10 +5082,10 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) {
if (Instruction *Abs = canonicalizeAbs(I, Builder))
return Abs;
- // Otherwise, if all else failed, try to hoist the xor-by-constant:
- // (X ^ C) ^ Y --> (X ^ Y) ^ C
- // Just like we do in other places, we completely avoid the fold
- // for constantexprs, at least to avoid endless combine loop.
+ // Otherwise, if all else failed, try to hoist the xor-by-constant:
+ // (X ^ C) ^ Y --> (X ^ Y) ^ C
+ // Just like we do in other places, we completely avoid the fold
+ // for constantexprs, at least to avoid endless combine loop.
if (match(&I, m_c_Xor(m_OneUse(m_Xor(m_CombineAnd(m_Value(X),
m_Unless(m_ConstantExpr())),
m_ImmConstant(C1))),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index ffda554c84e83..cc6c0cd6e7e01 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4390,7 +4390,7 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
return true;
if (V->getType()->getScalarSizeInBits() == 1)
return true;
- if (Depth++ >= getAnalysisRecursionDepthLimit())
+ if (Depth++ >= DepthLimit::getMaxRecursionDepth())
return false;
Value *X;
const Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 6fe80e01eec7d..4ea7ecb335dee 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1533,7 +1533,7 @@ Value *InstCombinerImpl::takeLog2(Value *Op, unsigned Depth, bool AssumeNonZero,
});
// The remaining tests are all recursive, so bail out if we hit the limit.
- if (Depth++ == getAnalysisRecursionDepthLimit())
+ if (Depth++ == DepthLimit::getMaxRecursionDepth())
return nullptr;
// log2(zext X) -> zext log2(X)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 0bf05f975ffaa..1ec8fd8dc9f77 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -3641,7 +3641,7 @@ static bool matchFMulByZeroIfResultEqZero(InstCombinerImpl &IC, Value *Cmp0,
/// select condition.
static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
unsigned Depth) {
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth == DepthLimit::getMaxRecursionDepth())
return false;
// Ignore the case where the select arm itself is affected. These cases
@@ -3651,9 +3651,9 @@ static bool hasAffectedValue(Value *V, SmallPtrSetImpl<Value *> &Affected,
if (auto *I = dyn_cast<Instruction>(V)) {
if (isa<PHINode>(I)) {
- if (Depth == getAnalysisRecursionDepthLimit() - 1)
+ if (Depth == DepthLimit::getMaxRecursionDepth() - 1)
return false;
- Depth = getAnalysisRecursionDepthLimit() - 2;
+ Depth = DepthLimit::getMaxRecursionDepth() - 2;
}
return any_of(I->operands(), [&](Value *Op) {
return Op->getType()->isIntOrIntVectorTy() &&
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 595fa2c5c7e26..46ac778d68d5f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -117,7 +117,7 @@ bool InstCombinerImpl::SimplifyDemandedBits(Instruction *I, unsigned OpNo,
return false;
}
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth == DepthLimit::getMaxRecursionDepth())
return false;
Value *NewVal;
@@ -167,7 +167,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
const SimplifyQuery &Q,
unsigned Depth) {
assert(I != nullptr && "Null pointer of Value???");
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
Type *VTy = I->getType();
assert(
@@ -1452,7 +1452,7 @@ Value *InstCombinerImpl::SimplifyDemandedVectorElts(Value *V,
// Limit search depth.
if (Depth == SimplifyDemandedVectorEltsDepthLimit &&
- Depth >= getAnalysisRecursionDepthLimit())
+ Depth >= DepthLimit::getMaxRecursionDepth())
return nullptr;
if (!AllowMultipleUsers) {
@@ -1966,10 +1966,12 @@ static Constant *getFPClassConstant(Type *Ty, FPClassTest Mask) {
}
}
-Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
- Value *V, const FPClassTest DemandedMask, KnownFPClass &Known,
- Instruction *CxtI, unsigned Depth) {
- assert(Depth <= getAnalysisRecursionDepthLimit() && "Limit Search Depth");
+Value *InstCombinerImpl::SimplifyDemandedUseFPClass(Value *V,
+ FPClassTest DemandedMask,
+ KnownFPClass &Known,
+ Instruction *CxtI,
+ unsigned Depth) {
+ assert(Depth <= DepthLimit::getMaxRecursionDepth() && "Limit Search Depth");
Type *VTy = V->getType();
assert(Known == KnownFPClass() && "expected uninitialized state");
@@ -1977,7 +1979,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseFPClass(
if (DemandedMask == fcNone)
return isa<UndefValue>(V) ? nullptr : PoisonValue::get(VTy);
- if (Depth == getAnalysisRecursionDepthLimit())
+ if (Depth == DepthLimit::getMaxRecursionDepth())
return nullptr;
Instruction *I = dyn_cast<Instruction>(V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 1a245631438dc..60e5d5fb0c3d7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2594,7 +2594,7 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
if (match(V, m_ImmConstant(C)))
return ConstantExpr::getNot(C);
- if (Depth++ >= getAnalysisRecursionDepthLimit())
+ if (Depth++ >= DepthLimit::getMaxRecursionDepth())
return nullptr;
// The rest of the cases require that we invert all uses so don't bother
@@ -2689,7 +2689,7 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
Value *NewIncomingVal =
getFreelyInvertedImpl(U.get(), /*WillInvertAllUses=*/false,
/*Builder=*/nullptr, LocalDoesConsume,
- getAnalysisRecursionDepthLimit() - 1);
+ DepthLimit::getMaxRecursionDepth() - 1);
if (NewIncomingVal == nullptr)
return nullptr;
// Make sure that we can safely erase the original PHI node.
diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index b57b82a97d743..549d4cf0f426f 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -823,9 +823,9 @@ ConstraintTy ConstraintInfo::getConstraintForSolving(CmpInst::Predicate Pred,
// with the signed <-> unsigned transfer logic.
if (CmpInst::isSigned(Pred) &&
isKnownNonNegative(Op0, DL,
- /*Depth=*/getAnalysisRecursionDepthLimit() - 1) &&
+ /*Depth=*/DepthLimit::getMaxRecursionDepth() - 1) &&
isKnownNonNegative(Op1, DL,
- /*Depth=*/getAnalysisRecursionDepthLimit() - 1))
+ /*Depth=*/DepthLimit::getMaxRecursionDepth() - 1))
Pred = ICmpInst::getUnsignedPredicate(Pred);
SmallVector<Value *> NewVariables;
@@ -899,7 +899,7 @@ void ConstraintInfo::transferToOtherSystem(
auto IsKnownNonNegative = [this](Value *V) {
return doesHold(CmpInst::ICMP_SGE, V, ConstantInt::get(V->getType(), 0)) ||
isKnownNonNegative(V, DL,
- /*Depth=*/getAnalysisRecursionDepthLimit() - 1);
+ /*Depth=*/DepthLimit::getMaxRecursionDepth() - 1);
};
// Check if we can combine facts from the signed and unsigned systems to
// derive additional facts.
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 320b79203c0b3..26a2ba7b58108 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -631,8 +631,27 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
ConstantOffset = CI->getValue();
} else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
// Trace into subexpressions for more hoisting opportunities.
- if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
- ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+ BinaryOperator *TraceInto = BO;
+ if (BO->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(BO->getOperand(1))) {
+ KnownBits LHSKnown(BO->getOperand(0)->getType()->getScalarSizeInBits());
+ KnownBits RHSKnown(BO->getOperand(1)->getType()->getScalarSizeInBits());
+ computeKnownBitsExhaustive(BO->getOperand(0), LHSKnown, DL);
+ computeKnownBitsExhaustive(BO->getOperand(1), RHSKnown, DL);
+ if (KnownBits::haveNoCommonBitsSet(LHSKnown, RHSKnown)) {
+ IRBuilder<> Builder(BO);
+ TraceInto = cast<BinaryOperator>(
+ Builder.CreateOr(BO->getOperand(0), BO->getOperand(1)));
+ cast<PossiblyDisjointInst>(TraceInto)->setIsDisjoint(true);
+ BO->replaceAllUsesWith(TraceInto);
+ BO->eraseFromParent();
+ }
+ }
+
+ if (CanTraceInto(SignExtended, ZeroExtended, TraceInto, NonNegative)) {
+ ConstantOffset =
+ findInEitherOperand(TraceInto, SignExtended, ZeroExtended);
+ }
} else if (isa<TruncInst>(V)) {
ConstantOffset =
find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll
new file mode 100644
index 0000000000000..3b0d952396425
--- /dev/null
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/xor-or-disjoint.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --passes=separate-const-offset-from-gep < %s | FileCheck %s
+
+ at global_smem = external addrspace(3) global [0 x i8], align 16
+
+define amdgpu_kernel void @matmul_kernel(ptr addrspace(1) %inptr, <4 x i32> %data0, <4 x i32> %data1, i1 %cond) {
+entry:
+ %28 = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %29 = and i32 %28, 8
+ %.not = icmp eq i32 %29, 0
+ %30 = and i32 %28, 16
+ %31 = icmp eq i32 %30, 0
+ %32 = and i32 %28, 32
+ %33 = icmp eq i32 %32, 0
+ %34 = and i32 %28, 256
+ %53 = shl i32 %28, 3
+ %54 = and i32 %53, 56
+ %121 = select i1 %.not, i32 0, i32 72
+ %122 = select i1 %31, i32 0, i32 144
+ %123 = or disjoint i32 %121, %122
+ %124 = select i1 %33, i32 0, i32 288
+ %125 = or disjoint i32 %123, %124
+ %126 = xor i32 %125, %54
+ %127 = and i32 %53, 1536
+ %128 = or disjoint i32 %127, %126
+ %129 = shl nuw nsw i32 %34, 3
+ %130 = or disjoint i32 %128, %129
+ %132 = xor i32 %130, 4096
+ %133 = getelementptr inbounds nuw half, ptr addrspace(3) @global_smem, i32 %132
+ br i1 %cond, label %s1, label %s2
+
+s1:
+ store <4 x i32> %data0, ptr addrspace(3) %133, align 16
+ br label %end
+
+s2:
+ br label %end
+
+end:
+ ret void
+}
More information about the llvm-commits
mailing list