[llvm] 305cf0e - [InstCombine] Make foldCmpLoadFromIndexedGlobal() GEP-type independent (#157089)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 8 03:54:28 PDT 2025
Author: Nikita Popov
Date: 2025-09-08T12:54:24+02:00
New Revision: 305cf0e912529bfd847429f33ae09756aeb1813a
URL: https://github.com/llvm/llvm-project/commit/305cf0e912529bfd847429f33ae09756aeb1813a
DIFF: https://github.com/llvm/llvm-project/commit/305cf0e912529bfd847429f33ae09756aeb1813a.diff
LOG: [InstCombine] Make foldCmpLoadFromIndexedGlobal() GEP-type independent (#157089)
foldCmpLoadFromIndexedGlobal() currently checks that the global type,
the GEP type and the load type match in certain ways. Replace this with
generic logic based on offsets.
This is a reboot of https://github.com/llvm/llvm-project/pull/67093.
This PR is less ambitious by requiring that the constant offset is
smaller than the stride, which avoids the additional complexity of that
PR.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
llvm/test/Transforms/InstCombine/load-cmp.ll
llvm/test/Transforms/InstCombine/opaque-ptr.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 90feddf6dcfe1..01b0da3469c18 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -112,73 +112,42 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
ConstantInt *AndCst) {
- if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
- !GV->getValueType()->isArrayTy() || !GV->isConstant() ||
- !GV->hasDefinitiveInitializer())
- return nullptr;
-
- Type *GEPSrcEltTy = GEP->getSourceElementType();
- if (GEPSrcEltTy->isArrayTy())
- GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
- if (GV->getValueType()->getArrayElementType() != GEPSrcEltTy)
+ if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
return nullptr;
Constant *Init = GV->getInitializer();
- if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+ TypeSize GlobalSize = DL.getTypeAllocSize(Init->getType());
+ Type *EltTy = LI->getType();
+ TypeSize EltSize = DL.getTypeStoreSize(EltTy);
+ if (EltSize.isScalable())
return nullptr;
- uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
- // Don't blow up on huge arrays.
- if (ArrayElementCount > MaxArraySizeForCombine)
+ unsigned IndexBW = DL.getIndexTypeSizeInBits(GEP->getType());
+ SmallMapVector<Value *, APInt, 4> VarOffsets;
+ APInt ConstOffset(IndexBW, 0);
+ if (!GEP->collectOffset(DL, IndexBW, VarOffsets, ConstOffset) ||
+ VarOffsets.size() != 1 || IndexBW > 64)
return nullptr;
- // There are many forms of this optimization we can handle, for now, just do
- // the simple index into a single-dimensional array or elements of equal size.
- //
- // Require: GEP [n x i8] GV, 0, Idx {{, constant indices}}
- // Or: GEP i8 GV, Idx
-
- unsigned GEPIdxOp = 1;
- if (GEP->getSourceElementType()->isArrayTy()) {
- GEPIdxOp = 2;
- if (!match(GEP->getOperand(1), m_ZeroInt()))
- return nullptr;
- }
- if (GEP->getNumOperands() < GEPIdxOp + 1 ||
- isa<Constant>(GEP->getOperand(GEPIdxOp)))
+ Value *Idx = VarOffsets.front().first;
+ const APInt &Stride = VarOffsets.front().second;
+ // If the index type is non-canonical, wait for it to be canonicalized.
+ if (Idx->getType()->getScalarSizeInBits() != IndexBW)
return nullptr;
- // Check that indices after the variable are constants and in-range for the
- // type they index. Collect the indices. This is typically for arrays of
- // structs.
- SmallVector<unsigned, 4> LaterIndices;
-
- Type *EltTy = Init->getType()->getArrayElementType();
- for (unsigned i = GEPIdxOp + 1, e = GEP->getNumOperands(); i != e; ++i) {
- ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!Idx)
- return nullptr; // Variable index.
-
- uint64_t IdxVal = Idx->getZExtValue();
- if ((unsigned)IdxVal != IdxVal)
- return nullptr; // Too large array index.
-
- if (StructType *STy = dyn_cast<StructType>(EltTy))
- EltTy = STy->getElementType(IdxVal);
- else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
- if (IdxVal >= ATy->getNumElements())
- return nullptr;
- EltTy = ATy->getElementType();
- } else {
- return nullptr; // Unknown type.
- }
+ // Allow an additional context offset, but only within the stride.
+ if (!ConstOffset.ult(Stride))
+ return nullptr;
- LaterIndices.push_back(IdxVal);
- }
+ // Don't handle overlapping loads for now.
+ if (!Stride.uge(EltSize.getFixedValue()))
+ return nullptr;
- Value *Idx = GEP->getOperand(GEPIdxOp);
- // If the index type is non-canonical, wait for it to be canonicalized.
- if (Idx->getType() != DL.getIndexType(GEP->getType()))
+ // Don't blow up on huge arrays.
+ uint64_t ArrayElementCount =
+ divideCeil((GlobalSize.getFixedValue() - ConstOffset.getZExtValue()),
+ Stride.getZExtValue());
+ if (ArrayElementCount > MaxArraySizeForCombine)
return nullptr;
enum { Overdefined = -3, Undefined = -2 };
@@ -211,18 +180,12 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Scan the array and see if one of our patterns matches.
Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
- for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
- Constant *Elt = Init->getAggregateElement(i);
+ APInt Offset = ConstOffset;
+ for (unsigned i = 0, e = ArrayElementCount; i != e; ++i, Offset += Stride) {
+ Constant *Elt = ConstantFoldLoadFromConst(Init, EltTy, Offset, DL);
if (!Elt)
return nullptr;
- // If this is indexing an array of structures, get the structure element.
- if (!LaterIndices.empty()) {
- Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
- if (!Elt)
- return nullptr;
- }
-
// If the element is masked, handle it.
if (AndCst) {
Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
@@ -309,19 +272,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
// Now that we've scanned the entire array, emit our new comparison(s). We
// order the state machines in complexity of the generated code.
- // If inbounds keyword is not present, Idx * ElementSize can overflow.
- // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+ // If inbounds keyword is not present, Idx * Stride can overflow.
+ // Let's assume that Stride is 2 and the wanted value is at offset 0.
// Then, there are two possible values for Idx to match offset 0:
// 0x00..00, 0x80..00.
// Emitting 'icmp eq Idx, 0' isn't correct in this case because the
// comparison is false if Idx was 0x80..00.
// We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
- unsigned ElementSize =
- DL.getTypeAllocSize(Init->getType()->getArrayElementType());
auto MaskIdx = [&](Value *Idx) {
- if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
+ if (!GEP->isInBounds() && Stride.countr_zero() != 0) {
Value *Mask = Constant::getAllOnesValue(Idx->getType());
- Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
+ Mask = Builder.CreateLShr(Mask, Stride.countr_zero());
Idx = Builder.CreateAnd(Idx, Mask);
}
return Idx;
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 2a2c74dc09e2f..79faefbd5df56 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -371,3 +371,186 @@ define i1 @pr93017(i64 %idx) {
%cmp = icmp ne ptr %v, null
ret i1 %cmp
}
+
+ at g_i32_lo = internal constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+; Mask is 0b10101010
+define i1 @load_vs_array_type_mismatch1(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch1(
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 170
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr inbounds i16, ptr @g_i32_lo, i32 %idx
+ %load = load i16, ptr %gep
+ %cmp = icmp eq i16 %load, 0
+ ret i1 %cmp
+}
+
+ at g_i32_hi = internal constant [4 x i32] [i32 u0x00010000, i32 u0x00020000, i32 u0x00030000, i32 u0x00040000]
+
+; Mask is 0b01010101
+define i1 @load_vs_array_type_mismatch2(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch2(
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 85
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr inbounds i16, ptr @g_i32_hi, i32 %idx
+ %load = load i16, ptr %gep
+ %cmp = icmp eq i16 %load, 0
+ ret i1 %cmp
+}
+
+ at g_i16_1 = internal constant [8 x i16] [i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1, i16 0]
+
+; idx == 1 || idx == 3
+define i1 @load_vs_array_type_mismatch_offset1(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch_offset1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr inbounds {i16, i16}, ptr @g_i16_1, i32 %idx, i32 1
+ %load = load i16, ptr %gep
+ %cmp = icmp eq i16 %load, 0
+ ret i1 %cmp
+}
+
+ at g_i16_2 = internal constant [8 x i16] [i16 1, i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1]
+
+; idx == 0 || idx == 2
+define i1 @load_vs_array_type_mismatch_offset2(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch_offset2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr inbounds {i16, i16}, ptr @g_i16_2, i32 %idx, i32 1
+ %load = load i16, ptr %gep
+ %cmp = icmp eq i16 %load, 0
+ ret i1 %cmp
+}
+
+define i1 @offset_larger_than_stride(i32 %idx) {
+; CHECK-LABEL: @offset_larger_than_stride(
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr [2 x i16], ptr @g_i16_1, i32 1, i32 [[TMP1:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr [2 x i16], ptr @g_i16_1, i64 1, i32 %idx
+ %load = load i16, ptr %gep
+ %cmp = icmp eq i16 %load, 0
+ ret i1 %cmp
+}
+
+define i1 @load_size_larger_stride(i32 %idx) {
+; CHECK-LABEL: @load_size_larger_stride(
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr @g_i16_1, i32 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %gep = getelementptr i8, ptr @g_i16_1, i32 %idx
+ %load = load i16, ptr %gep
+ %cmp = icmp eq i16 %load, 0
+ ret i1 %cmp
+}
+
+ at CG_MESSY = constant [9 x i32] [i32 1, i32 7, i32 -1, i32 5, i32 4, i32 1, i32 1, i32 5, i32 4]
+
+define i1 @cmp_load_constant_array_messy(i32 %x){
+; CHECK-LABEL: @cmp_load_constant_array_messy(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0:%.*]], 1073741823
+; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 373
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[COND]]
+;
+
+entry:
+ %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond = icmp slt i32 %isOK, 5
+ ret i1 %cond
+}
+
+define i1 @cmp_
diff _load_constant_array_messy0(i32 %x){
+; CHECK-LABEL: @cmp_
diff _load_constant_array_messy0(
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1:%.*]], 1073741823
+; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i32 1, [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 373
+; CHECK-NEXT: [[COND:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
+ %isOK = load i16, ptr %isOK_ptr
+ %cond = icmp slt i16 %isOK, 5
+ ret i1 %cond
+}
+
+; Load size larger than store size currently not supported.
+define i1 @cmp_
diff _load_constant_array_messy1(i32 %x){
+; CHECK-LABEL: @cmp_
diff _load_constant_array_messy1(
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1:%.*]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT: ret i1 [[COND]]
+;
+ %isOK_ptr = getelementptr i6, ptr @CG_MESSY, i32 %x
+ %isOK = load i16, ptr %isOK_ptr
+ %cond = icmp slt i16 %isOK, 5
+ ret i1 %cond
+}
+
+define i1 @cmp_load_constant_array_variable_icmp(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_load_constant_array_variable_icmp(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG_MESSY, i32 [[X:%.*]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], [[Y:%.*]]
+; CHECK-NEXT: ret i1 [[COND]]
+;
+entry:
+ %isOK_ptr = getelementptr inbounds i32, ptr @CG_MESSY, i32 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond = icmp ult i32 %isOK, %y
+ ret i1 %cond
+}
+
+ at CG_CLEAR = constant [10 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9]
+
+; Offsets not supported if negative or larger than stride.
+define i1 @cmp_load_constant_additional_positive_offset(i32 %x) {
+; CHECK-LABEL: @cmp_load_constant_additional_positive_offset(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 [[X:%.*]]
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
+; CHECK-NEXT: ret i1 [[COND]]
+;
+entry:
+ %isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 %x
+ %isOK = load i32, ptr %isOK_ptr
+ %cond = icmp ult i32 %isOK, 5
+ ret i1 %cond
+}
+
+define i1 @cmp_load_constant_additional_negative_offset(i32 %x) {
+; CHECK-LABEL: @cmp_load_constant_additional_negative_offset(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 [[X:%.*]], i32 -5
+; CHECK-NEXT: [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
+; CHECK-NEXT: ret i1 [[COND]]
+;
+entry:
+ %isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 %x, i32 -5
+ %isOK = load i32, ptr %isOK_ptr
+ %cond = icmp ult i32 %isOK, 5
+ ret i1 %cond
+}
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 047698102a695..b54170ed3a2e8 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -543,10 +543,7 @@ define i1 @cmp_load_gep_global_
diff erent_load_type(i64 %idx) {
define i1 @cmp_load_gep_global_
diff erent_gep_type(i64 %idx) {
; CHECK-LABEL: @cmp_load_gep_global_
diff erent_gep_type(
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr @ary, i64 [[IDX:%.*]]
-; CHECK-NEXT: [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: ret i1 false
;
%gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx
%load = load i16, ptr %gep
More information about the llvm-commits
mailing list