[clang-tools-extra] [InstCombine] Fold icmp(constants[x]) when the range of x is given (PR #67093)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 22 00:02:36 PDT 2023
https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/67093
>From e7314709fc812736a357b50654b08ef722fb89a4 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 31 Jul 2023 17:10:45 +0800
Subject: [PATCH 1/2] [InstCombine] Tests for simplifying icmp(constants[x])
---
llvm/test/Transforms/InstCombine/load-cmp.ll | 146 +++++++++++++++++++
1 file changed, 146 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 7fb6e7a3a37b388..4c74e73ee991c93 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -334,3 +334,149 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
%r = icmp eq i32 %q, 9
ret i1 %r
}
+
+
+ at CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+define i1 @cmp_load_constant_array0(i64 %x){
+; CHECK-LABEL: @cmp_load_constant_array0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i64 [[X:%.*]], 2
+; CHECK-NEXT: br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK: case2:
+; CHECK-NEXT: ret i1 false
+; CHECK: case1:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3
+; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+;
+entry:
+ %cond = icmp ult i64 %x, 2
+ br i1 %cond, label %case1, label %case2
+
+case2:
+ ret i1 0
+
+case1:
+ %isOK_ptr = getelementptr inbounds i32, ptr @CG, i64 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond_inferred = icmp ult i32 %isOK, 3
+ ret i1 %cond_inferred
+}
+
+define i1 @cmp_load_constant_array1(i64 %x){
+; CHECK-LABEL: @cmp_load_constant_array1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i64 [[X:%.*]], 2
+; CHECK-NEXT: br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK: case2:
+; CHECK-NEXT: ret i1 false
+; CHECK: case1:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10
+; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+;
+entry:
+ %cond = icmp ult i64 %x, 2
+ br i1 %cond, label %case1, label %case2
+
+case2:
+ ret i1 0
+
+case1:
+ %isOK_ptr = getelementptr inbounds i32, ptr @CG, i64 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond_inferred = icmp ugt i32 %isOK, 10
+ ret i1 %cond_inferred
+}
+
+ at CG_MESSY = constant [9 x i32] [i32 1, i32 7, i32 -1, i32 5, i32 4, i32 1, i32 1, i32 5, i32 4]
+
+define i1 @cmp_load_constant_array_messy(i64 %x){
+; CHECK-LABEL: @cmp_load_constant_array_messy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[X:%.*]], 6
+; CHECK-NEXT: br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK: case2:
+; CHECK-NEXT: ret i1 false
+; CHECK: case1:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5
+; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+;
+entry:
+ %cond = icmp slt i64 %x, 6
+ br i1 %cond, label %case1, label %case2
+
+case2:
+ ret i1 0
+
+case1:
+ %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond_inferred = icmp slt i32 %isOK, 5
+ ret i1 %cond_inferred
+}
+
+define i1 @cmp_diff_load_constant_array_messy0(i64 %x){
+; CHECK-LABEL: @cmp_diff_load_constant_array_messy0(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP1]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+;
+ %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x
+ %isOK = load i16, ptr %isOK_ptr
+ %cond_inferred = icmp slt i16 %isOK, 5
+ ret i1 %cond_inferred
+}
+
+define i1 @cmp_diff_load_constant_array_messy1(i64 %x){
+; CHECK-LABEL: @cmp_diff_load_constant_array_messy1(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+;
+%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x
+ %isOK = load i16, ptr %isOK_ptr
+ %cond_inferred = icmp slt i16 %isOK, 5
+ ret i1 %cond_inferred
+}
+
+define i1 @cmp_load_constant_array_fail0(i64 %x, i32 %y) {
+; CHECK-LABEL: @cmp_load_constant_array_fail0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i64 [[X:%.*]], 3
+; CHECK-NEXT: br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK: case2:
+; CHECK-NEXT: ret i1 false
+; CHECK: case1:
+; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+;
+entry:
+ %cond = icmp ult i64 %x, 3
+ br i1 %cond, label %case1, label %case2
+
+case2:
+ ret i1 0
+
+case1:
+ %isOK_ptr = getelementptr inbounds i32, ptr @CG, i64 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond_inferred = icmp ult i32 %isOK, %y
+ ret i1 %cond_inferred
+}
+
>From 17010469ac7738b60497d3c5d196beda9bf84ec1 Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 31 Jul 2023 17:17:07 +0800
Subject: [PATCH 2/2] [InstCombine]Fold icmp(constants[x]) when the range of x
is given
---
.../Transforms/InstCombine/InstCombiner.h | 2 +
.../InstCombine/InstCombineCompares.cpp | 201 ++++++++++--------
.../InstCombine/InstructionCombining.cpp | 2 +
llvm/test/Transforms/InstCombine/load-cmp.ll | 71 +++----
.../test/Transforms/InstCombine/opaque-ptr.ll | 10 +-
5 files changed, 150 insertions(+), 136 deletions(-)
diff --git a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
index 6b5a7c3634dcb45..b643522d1edb60d 100644
--- a/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
+++ b/llvm/include/llvm/Transforms/InstCombine/InstCombiner.h
@@ -52,6 +52,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
public:
/// Maximum size of array considered when transforming.
uint64_t MaxArraySizeForCombine = 0;
+ /// Maximum bytes of data considered when transforming.
+ uint64_t MaxDataSizeForCombine = 0;
/// An IRBuilder that automatically inserts new instructions into the
/// worklist.
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 06a999da0671f40..9968cf13d4d6878 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -12,6 +12,7 @@
#include "InstCombineInternal.h"
#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CaptureTracking.h"
@@ -20,10 +21,12 @@
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/ConstantRange.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
@@ -108,55 +111,34 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
- if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
- GV->getValueType() != GEP->getSourceElementType() ||
- !GV->isConstant() || !GV->hasDefinitiveInitializer())
+ if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return nullptr;
Constant *Init = GV->getInitializer();
- if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
- return nullptr;
+ uint64_t DataSize = DL.getTypeAllocSize(Init->getType());
- uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
// Don't blow up on huge arrays.
- if (ArrayElementCount > MaxArraySizeForCombine)
- return nullptr;
-
- // There are many forms of this optimization we can handle, for now, just do
- // the simple index into a single-dimensional array.
- //
- // Require: GEP GV, 0, i {{, constant indices}}
- if (GEP->getNumOperands() < 3 ||
- !isa<ConstantInt>(GEP->getOperand(1)) ||
- !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
- isa<Constant>(GEP->getOperand(2)))
+ if (DataSize > MaxDataSizeForCombine)
return nullptr;
- // Check that indices after the variable are constants and in-range for the
- // type they index. Collect the indices. This is typically for arrays of
- // structs.
- SmallVector<unsigned, 4> LaterIndices;
+ Type *LoadedTy = LI->getType();
+ uint64_t LoadedTySize = DL.getTypeAllocSize(LoadedTy);
+ uint64_t PtrBitwidth = DL.getIndexSizeInBits(GEP->getPointerAddressSpace());
+ Type *PtrIdxTy = DL.getIndexType(GEP->getType());
- Type *EltTy = Init->getType()->getArrayElementType();
- for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
- ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!Idx) return nullptr; // Variable index.
-
- uint64_t IdxVal = Idx->getZExtValue();
- if ((unsigned)IdxVal != IdxVal) return nullptr; // Too large array index.
-
- if (StructType *STy = dyn_cast<StructType>(EltTy))
- EltTy = STy->getElementType(IdxVal);
- else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
- if (IdxVal >= ATy->getNumElements()) return nullptr;
- EltTy = ATy->getElementType();
- } else {
- return nullptr; // Unknown type.
- }
+ MapVector<Value *, APInt> VariableOffsets;
+ APInt ConstantOffset(PtrBitwidth, 0);
+ GEP->collectOffset(GEP->getModule()->getDataLayout(), PtrBitwidth,
+ VariableOffsets, ConstantOffset);
- LaterIndices.push_back(IdxVal);
- }
+ // Restrict to one variable currently.
+ if (VariableOffsets.size() != 1)
+ return nullptr;
+ // There are many forms of this optimization we can handle.
+ // Limit to one variable currently.
+ // Possible TODO: Fold: cmp(A[ax + by + ... + C], Rhs) <=> cmp(ax + by + ....
+ // + C, IndexRhs)
enum { Overdefined = -3, Undefined = -2 };
// Variables for our state machines.
@@ -185,18 +167,36 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// the array, this will fully represent all the comparison results.
uint64_t MagicBitvector = 0;
+ Value *Idx = nullptr;
+
// Scan the array and see if one of our patterns matches.
- Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
- for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
- Constant *Elt = Init->getAggregateElement(i);
- if (!Elt) return nullptr;
-
- // If this is indexing an array of structures, get the structure element.
- if (!LaterIndices.empty()) {
- Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
- if (!Elt)
- return nullptr;
- }
+ Constant *ComparedRHS = cast<Constant>(ICI.getOperand(1));
+ APInt OffsetStep = VariableOffsets.front().second;
+ // The longest step we can reach once.
+ uint64_t OffsetStepZExt = OffsetStep.getZExtValue();
+ // Offset from constant pointer where we begin scanning the constant.
+ int64_t BeginOffset = ConstantOffset.getSExtValue();
+
+ // Make BeginOffset the smallest offset >= 0
+ if (BeginOffset % OffsetStepZExt == 0)
+ BeginOffset = 0;
+ else if (BeginOffset < 0)
+ BeginOffset += (-BeginOffset / OffsetStepZExt + 1) * OffsetStepZExt;
+ else if (BeginOffset > 0)
+ BeginOffset -= (BeginOffset / OffsetStepZExt) * OffsetStepZExt;
+
+ uint64_t ElementCountToTraverse = (DataSize - BeginOffset) / OffsetStepZExt;
+
+ // Don't traverse too many times.
+ if (ElementCountToTraverse > MaxArraySizeForCombine)
+ return nullptr;
+
+ for (uint64_t i = 0; i < ElementCountToTraverse; ++i) {
+ APInt CurOffset(i * OffsetStep + BeginOffset);
+ Constant *Elt = ConstantFoldLoadFromConstPtr(GV, LoadedTy, CurOffset, DL);
+
+ if (!Elt)
+ return nullptr;
// If the element is masked, handle it.
if (AndCst) {
@@ -207,21 +207,22 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Find out if the comparison would be true or false for the i'th element.
Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
- CompareRHS, DL, &TLI);
+ ComparedRHS, DL, &TLI);
// If the result is undef for this element, ignore it.
if (isa<UndefValue>(C)) {
// Extend range state machines to cover this element in case there is an
// undef in the middle of the range.
- if (TrueRangeEnd == (int)i-1)
+ if (TrueRangeEnd == (int)i - 1)
TrueRangeEnd = i;
- if (FalseRangeEnd == (int)i-1)
+ if (FalseRangeEnd == (int)i - 1)
FalseRangeEnd = i;
continue;
}
// If we can't compute the result for any of the elements, we have to give
// up evaluating the entire conditional.
- if (!isa<ConstantInt>(C)) return nullptr;
+ if (!isa<ConstantInt>(C))
+ return nullptr;
// Otherwise, we know if the comparison is true or false for this element,
// update our state machines.
@@ -231,7 +232,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
if (IsTrueForElt) {
// Update the TrueElement state machine.
if (FirstTrueElement == Undefined)
- FirstTrueElement = TrueRangeEnd = i; // First true element.
+ FirstTrueElement = TrueRangeEnd = i; // First true element.
else {
// Update double-compare state machine.
if (SecondTrueElement == Undefined)
@@ -240,7 +241,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SecondTrueElement = Overdefined;
// Update range state machine.
- if (TrueRangeEnd == (int)i-1)
+ if (TrueRangeEnd == (int)i - 1)
TrueRangeEnd = i;
else
TrueRangeEnd = Overdefined;
@@ -257,7 +258,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
SecondFalseElement = Overdefined;
// Update range state machine.
- if (FalseRangeEnd == (int)i-1)
+ if (FalseRangeEnd == (int)i - 1)
FalseRangeEnd = i;
else
FalseRangeEnd = Overdefined;
@@ -267,7 +268,6 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// If this element is in range, update our magic bitvector.
if (i < 64 && IsTrueForElt)
MagicBitvector |= 1ULL << i;
-
// If all of our states become overdefined, bail out early. Since the
// predicate is expensive, only check it every 8 elements. This is only
// really useful for really huge arrays.
@@ -279,40 +279,62 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Now that we've scanned the entire array, emit our new comparison(s). We
// order the state machines in complexity of the generated code.
- Value *Idx = GEP->getOperand(2);
-
- // If the index is larger than the pointer offset size of the target, truncate
- // the index down like the GEP would do implicitly. We don't have to do this
- // for an inbounds GEP because the index can't be out of range.
- if (!GEP->isInBounds()) {
- Type *PtrIdxTy = DL.getIndexType(GEP->getType());
- unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
- if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
- Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
- }
- // If inbounds keyword is not present, Idx * ElementSize can overflow.
- // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+ // If inbounds keyword is not present, Idx * LongestStep can overflow.
+ // Let's assume that LongestStep is 2 and the wanted value is at offset 0.
// Then, there are two possible values for Idx to match offset 0:
// 0x00..00, 0x80..00.
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
// comparison is false if Idx was 0x80..00.
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
- unsigned ElementSize =
- DL.getTypeAllocSize(Init->getType()->getArrayElementType());
auto MaskIdx = [&](Value *Idx) {
- if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
+ if (!GEP->isInBounds() && llvm::countr_zero(OffsetStepZExt) != 0) {
Value *Mask = ConstantInt::get(Idx->getType(), -1);
- Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
+ Mask = Builder.CreateLShr(Mask, llvm::countr_zero(OffsetStepZExt));
Idx = Builder.CreateAnd(Idx, Mask);
}
return Idx;
};
+ // Build the index expression lazily.
+ auto LazyGetIndex = [&](Value *CurIdx) {
+ if (CurIdx)
+ return CurIdx;
+
+ // Initial bias for index. For example, when we fold C[x + 3] into
+ // x < 2, we actually regard it as x < 5 - 3
+ Value *Idx =
+ ConstantInt::get(PtrIdxTy->getContext(),
+ (BeginOffset - ConstantOffset).sdiv(OffsetStepZExt));
+ for (auto [Var, Coefficient] : VariableOffsets) {
+ uint64_t VarBitWidth = Var->getType()->getScalarSizeInBits();
+ uint64_t IdxBitWidth = Idx->getType()->getScalarSizeInBits();
+ Type *WiderType =
+ VarBitWidth > IdxBitWidth ? Var->getType() : Idx->getType();
+
+ Var = Builder.CreateSExtOrTrunc(Var, WiderType);
+ Idx = Builder.CreateSExtOrTrunc(Idx, WiderType);
+ APInt MinCoeffi = Coefficient.sdiv(OffsetStep)
+ .sextOrTrunc(WiderType->getScalarSizeInBits());
+ Value *Mul =
+ Builder.CreateMul(Var, ConstantInt::get(WiderType, MinCoeffi));
+ Idx = Builder.CreateAdd(Idx, Mul);
+ }
+
+ // If the index is larger than the pointer offset size of the target,
+ // truncate the index down like the GEP would do implicitly. We don't have
+ // to do this for an inbounds GEP because the index can't be out of range.
+ if (!GEP->isInBounds() &&
+ Idx->getType()->getScalarSizeInBits() > PtrBitwidth)
+ Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
+
+ return MaskIdx(Idx);
+ };
+
// If the comparison is only true for one or two elements, emit direct
// comparisons.
if (SecondTrueElement != Overdefined) {
- Idx = MaskIdx(Idx);
+ Idx = LazyGetIndex(Idx);
// None true -> false.
if (FirstTrueElement == Undefined)
return replaceInstUsesWith(ICI, Builder.getFalse());
@@ -333,7 +355,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// If the comparison is only false for one or two elements, emit direct
// comparisons.
if (SecondFalseElement != Overdefined) {
- Idx = MaskIdx(Idx);
+ Idx = LazyGetIndex(Idx);
// None false -> true.
if (FirstFalseElement == Undefined)
return replaceInstUsesWith(ICI, Builder.getTrue());
@@ -346,7 +368,8 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// False for two elements -> 'i != 47 & i != 72'.
Value *C1 = Builder.CreateICmpNE(Idx, FirstFalseIdx);
- Value *SecondFalseIdx = ConstantInt::get(Idx->getType(),SecondFalseElement);
+ Value *SecondFalseIdx =
+ ConstantInt::get(Idx->getType(), SecondFalseElement);
Value *C2 = Builder.CreateICmpNE(Idx, SecondFalseIdx);
return BinaryOperator::CreateAnd(C1, C2);
}
@@ -355,7 +378,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// where it is true, emit the range check.
if (TrueRangeEnd != Overdefined) {
assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
- Idx = MaskIdx(Idx);
+ Idx = LazyGetIndex(Idx);
// Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
if (FirstTrueElement) {
@@ -363,23 +386,23 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
Idx = Builder.CreateAdd(Idx, Offs);
}
- Value *End = ConstantInt::get(Idx->getType(),
- TrueRangeEnd-FirstTrueElement+1);
+ Value *End =
+ ConstantInt::get(PtrIdxTy, TrueRangeEnd - FirstTrueElement + 1);
return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
}
// False range check.
if (FalseRangeEnd != Overdefined) {
assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
- Idx = MaskIdx(Idx);
+ Idx = LazyGetIndex(Idx);
// Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
if (FirstFalseElement) {
Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
Idx = Builder.CreateAdd(Idx, Offs);
}
- Value *End = ConstantInt::get(Idx->getType(),
- FalseRangeEnd-FirstFalseElement);
+ Value *End =
+ ConstantInt::get(Idx->getType(), FalseRangeEnd - FirstFalseElement);
return new ICmpInst(ICmpInst::ICMP_UGT, Idx, End);
}
@@ -392,13 +415,15 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Look for an appropriate type:
// - The type of Idx if the magic fits
// - The smallest fitting legal type
- if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
- Ty = Idx->getType();
+
+ if (ElementCountToTraverse <= PtrIdxTy->getIntegerBitWidth())
+ Ty = PtrIdxTy;
else
- Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+ Ty = DL.getSmallestLegalIntType(Init->getContext(),
+ ElementCountToTraverse);
if (Ty) {
- Idx = MaskIdx(Idx);
+ Idx = LazyGetIndex(Idx);
Value *V = Builder.CreateIntCast(Idx, Ty, false);
V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 5c5d7f695fa356e..86c97873e9c2f1c 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4332,6 +4332,8 @@ static bool combineInstructionsOverFunction(
InstCombinerImpl IC(Worklist, Builder, F.hasMinSize(), AA, AC, TLI, TTI, DT,
ORE, BFI, PSI, DL, LI);
IC.MaxArraySizeForCombine = MaxArraySize;
+ IC.MaxDataSizeForCombine = MaxArraySize * 8;
+
bool MadeChangeInThisIteration = IC.prepareWorklist(F, RPOT);
MadeChangeInThisIteration |= IC.run();
if (!MadeChangeInThisIteration)
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 4c74e73ee991c93..bb4fe8977fecdc0 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -215,10 +215,7 @@ define i1 @test10_struct(i32 %x) {
define i1 @test10_struct_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_noinbounds(
-; CHECK-NEXT: [[P:%.*]] = getelementptr [[FOO:%.*]], ptr @GS, i32 [[X:%.*]], i32 0
-; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[P]], align 8
-; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 9
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%p = getelementptr %Foo, ptr @GS, i32 %x, i32 0
%q = load i32, ptr %p
@@ -252,11 +249,7 @@ define i1 @test10_struct_i64(i64 %x){
define i1 @test10_struct_noinbounds_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_noinbounds_i16(
-; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
-; CHECK-NEXT: [[P:%.*]] = getelementptr [[FOO:%.*]], ptr @GS, i32 [[TMP1]], i32 0
-; CHECK-NEXT: [[Q:%.*]] = load i32, ptr [[P]], align 8
-; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 0
-; CHECK-NEXT: ret i1 [[R]]
+; CHECK-NEXT: ret i1 false
;
%p = getelementptr %Foo, ptr @GS, i16 %x, i32 0
%q = load i32, ptr %p
@@ -266,7 +259,8 @@ define i1 @test10_struct_noinbounds_i16(i16 %x) {
define i1 @test10_struct_arr(i32 %x) {
; CHECK-LABEL: @test10_struct_arr(
-; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[X:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -3
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2
@@ -277,8 +271,8 @@ define i1 @test10_struct_arr(i32 %x) {
define i1 @test10_struct_arr_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435455
-; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], 268435453
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i32 %x, i32 2
@@ -289,7 +283,8 @@ define i1 @test10_struct_arr_noinbounds(i32 %x) {
define i1 @test10_struct_arr_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_arr_i16(
-; CHECK-NEXT: [[R:%.*]] = icmp ne i16 [[X:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i16 [[X:%.*]], -3
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i16 0, i16 %x, i32 2
@@ -300,8 +295,8 @@ define i1 @test10_struct_arr_i16(i16 %x) {
define i1 @test10_struct_arr_i64(i64 %x) {
; CHECK-LABEL: @test10_struct_arr_i64(
-; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295
-; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967293
+; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr inbounds [4 x %Foo], ptr @GStructArr, i64 0, i64 %x, i32 2
@@ -313,8 +308,8 @@ define i1 @test10_struct_arr_i64(i64 %x) {
define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
; CHECK-NEXT: [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435455
-; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 1
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 268435453
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP2]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i16 %x, i32 2
@@ -325,8 +320,8 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
-; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435455
-; CHECK-NEXT: [[R:%.*]] = icmp ne i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 268435453
+; CHECK-NEXT: [[R:%.*]] = icmp eq i64 [[TMP1]], 0
; CHECK-NEXT: ret i1 [[R]]
;
%p = getelementptr [4 x %Foo], ptr @GStructArr, i32 0, i64 %x, i32 2
@@ -338,6 +333,7 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
@CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+; TODO: Fold it globally.
define i1 @cmp_load_constant_array0(i64 %x){
; CHECK-LABEL: @cmp_load_constant_array0(
; CHECK-NEXT: entry:
@@ -346,10 +342,8 @@ define i1 @cmp_load_constant_array0(i64 %x){
; CHECK: case2:
; CHECK-NEXT: ret i1 false
; CHECK: case1:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
-; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3
+; CHECK-NEXT: [[TMP0:%.*]] = and i64 [[X]], 4294967294
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp eq i64 [[TMP0]], 0
; CHECK-NEXT: ret i1 [[COND_INFERRED]]
;
entry:
@@ -374,11 +368,7 @@ define i1 @cmp_load_constant_array1(i64 %x){
; CHECK: case2:
; CHECK-NEXT: ret i1 false
; CHECK: case1:
-; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
-; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10
-; CHECK-NEXT: ret i1 [[COND_INFERRED]]
+; CHECK-NEXT: ret i1 false
;
entry:
%cond = icmp ult i64 %x, 2
@@ -405,9 +395,10 @@ define i1 @cmp_load_constant_array_messy(i64 %x){
; CHECK-NEXT: ret i1 false
; CHECK: case1:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]]
-; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1073741823
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 373, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP3]], 0
; CHECK-NEXT: ret i1 [[COND_INFERRED]]
;
entry:
@@ -427,9 +418,10 @@ case1:
define i1 @cmp_diff_load_constant_array_messy0(i64 %x){
; CHECK-LABEL: @cmp_diff_load_constant_array_messy0(
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP1]]
-; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 373, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 1
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP4]], 0
; CHECK-NEXT: ret i1 [[COND_INFERRED]]
;
%isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x
@@ -440,13 +432,13 @@ define i1 @cmp_diff_load_constant_array_messy0(i64 %x){
define i1 @cmp_diff_load_constant_array_messy1(i64 %x){
; CHECK-LABEL: @cmp_diff_load_constant_array_messy1(
-; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1]]
-; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
-; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 66160388071, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT: [[COND_INFERRED:%.*]] = icmp ne i64 [[TMP3]], 0
; CHECK-NEXT: ret i1 [[COND_INFERRED]]
;
-%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x
+ %isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x
%isOK = load i16, ptr %isOK_ptr
%cond_inferred = icmp slt i16 %isOK, 5
ret i1 %cond_inferred
@@ -479,4 +471,3 @@ case1:
%cond_inferred = icmp ult i32 %isOK, %y
ret i1 %cond_inferred
}
-
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4f12fa45e9ecadd..630761d6989ac7d 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -475,10 +475,7 @@ define i1 @cmp_load_gep_global(i64 %idx) {
define i1 @cmp_load_gep_global_different_load_type(i64 %idx) {
; CHECK-LABEL: @cmp_load_gep_global_different_load_type(
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i8], ptr @ary, i64 0, i64 [[IDX:%.*]]
-; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 false
;
%gep = getelementptr [4 x i8], ptr @ary, i64 0, i64 %idx
%load = load i16, ptr %gep
@@ -488,10 +485,7 @@ define i1 @cmp_load_gep_global_different_load_type(i64 %idx) {
define i1 @cmp_load_gep_global_different_gep_type(i64 %idx) {
; CHECK-LABEL: @cmp_load_gep_global_different_gep_type(
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr [4 x i16], ptr @ary, i64 0, i64 [[IDX:%.*]]
-; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 false
;
%gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx
%load = load i16, ptr %gep
More information about the cfe-commits
mailing list