[llvm] 305cf0e - [InstCombine] Make foldCmpLoadFromIndexedGlobal() GEP-type independent (#157089)

Mon Sep 8 03:54:28 PDT 2025

Author: Nikita Popov
Date: 2025-09-08T12:54:24+02:00
New Revision: 305cf0e912529bfd847429f33ae09756aeb1813a

URL: https://github.com/llvm/llvm-project/commit/305cf0e912529bfd847429f33ae09756aeb1813a
DIFF: https://github.com/llvm/llvm-project/commit/305cf0e912529bfd847429f33ae09756aeb1813a.diff

LOG: [InstCombine] Make foldCmpLoadFromIndexedGlobal() GEP-type independent (#157089)

foldCmpLoadFromIndexedGlobal() currently checks that the global type,
the GEP type and the load type match in certain ways. Replace this with
generic logic based on offsets.

This is a reboot of https://github.com/llvm/llvm-project/pull/67093.
This PR is less ambitious by requiring that the constant offset is
smaller than the stride, which avoids the additional complexity of that
PR.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/load-cmp.ll
    llvm/test/Transforms/InstCombine/opaque-ptr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 90feddf6dcfe1..01b0da3469c18 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -112,73 +112,42 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
 Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
     ConstantInt *AndCst) {
-  if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
-      !GV->getValueType()->isArrayTy() || !GV->isConstant() ||
-      !GV->hasDefinitiveInitializer())
-    return nullptr;
-
-  Type *GEPSrcEltTy = GEP->getSourceElementType();
-  if (GEPSrcEltTy->isArrayTy())
-    GEPSrcEltTy = GEPSrcEltTy->getArrayElementType();
-  if (GV->getValueType()->getArrayElementType() != GEPSrcEltTy)
+  if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return nullptr;
 
   Constant *Init = GV->getInitializer();
-  if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
+  TypeSize GlobalSize = DL.getTypeAllocSize(Init->getType());
+  Type *EltTy = LI->getType();
+  TypeSize EltSize = DL.getTypeStoreSize(EltTy);
+  if (EltSize.isScalable())
     return nullptr;
 
-  uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
-  // Don't blow up on huge arrays.
-  if (ArrayElementCount > MaxArraySizeForCombine)
+  unsigned IndexBW = DL.getIndexTypeSizeInBits(GEP->getType());
+  SmallMapVector<Value *, APInt, 4> VarOffsets;
+  APInt ConstOffset(IndexBW, 0);
+  if (!GEP->collectOffset(DL, IndexBW, VarOffsets, ConstOffset) ||
+      VarOffsets.size() != 1 || IndexBW > 64)
     return nullptr;
 
-  // There are many forms of this optimization we can handle, for now, just do
-  // the simple index into a single-dimensional array or elements of equal size.
-  //
-  // Require: GEP [n x i8] GV, 0, Idx {{, constant indices}}
-  //      Or: GEP i8 GV, Idx
-
-  unsigned GEPIdxOp = 1;
-  if (GEP->getSourceElementType()->isArrayTy()) {
-    GEPIdxOp = 2;
-    if (!match(GEP->getOperand(1), m_ZeroInt()))
-      return nullptr;
-  }
-  if (GEP->getNumOperands() < GEPIdxOp + 1 ||
-      isa<Constant>(GEP->getOperand(GEPIdxOp)))
+  Value *Idx = VarOffsets.front().first;
+  const APInt &Stride = VarOffsets.front().second;
+  // If the index type is non-canonical, wait for it to be canonicalized.
+  if (Idx->getType()->getScalarSizeInBits() != IndexBW)
     return nullptr;
 
-  // Check that indices after the variable are constants and in-range for the
-  // type they index.  Collect the indices.  This is typically for arrays of
-  // structs.
-  SmallVector<unsigned, 4> LaterIndices;
-
-  Type *EltTy = Init->getType()->getArrayElementType();
-  for (unsigned i = GEPIdxOp + 1, e = GEP->getNumOperands(); i != e; ++i) {
-    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
-    if (!Idx)
-      return nullptr; // Variable index.
-
-    uint64_t IdxVal = Idx->getZExtValue();
-    if ((unsigned)IdxVal != IdxVal)
-      return nullptr; // Too large array index.
-
-    if (StructType *STy = dyn_cast<StructType>(EltTy))
-      EltTy = STy->getElementType(IdxVal);
-    else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
-      if (IdxVal >= ATy->getNumElements())
-        return nullptr;
-      EltTy = ATy->getElementType();
-    } else {
-      return nullptr; // Unknown type.
-    }
+  // Allow an additional context offset, but only within the stride.
+  if (!ConstOffset.ult(Stride))
+    return nullptr;
 
-    LaterIndices.push_back(IdxVal);
-  }
+  // Don't handle overlapping loads for now.
+  if (!Stride.uge(EltSize.getFixedValue()))
+    return nullptr;
 
-  Value *Idx = GEP->getOperand(GEPIdxOp);
-  // If the index type is non-canonical, wait for it to be canonicalized.
-  if (Idx->getType() != DL.getIndexType(GEP->getType()))
+  // Don't blow up on huge arrays.
+  uint64_t ArrayElementCount =
+      divideCeil((GlobalSize.getFixedValue() - ConstOffset.getZExtValue()),
+                 Stride.getZExtValue());
+  if (ArrayElementCount > MaxArraySizeForCombine)
     return nullptr;
 
   enum { Overdefined = -3, Undefined = -2 };
@@ -211,18 +180,12 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
 
   // Scan the array and see if one of our patterns matches.
   Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
-  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
-    Constant *Elt = Init->getAggregateElement(i);
+  APInt Offset = ConstOffset;
+  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i, Offset += Stride) {
+    Constant *Elt = ConstantFoldLoadFromConst(Init, EltTy, Offset, DL);
     if (!Elt)
       return nullptr;
 
-    // If this is indexing an array of structures, get the structure element.
-    if (!LaterIndices.empty()) {
-      Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
-      if (!Elt)
-        return nullptr;
-    }
-
     // If the element is masked, handle it.
     if (AndCst) {
       Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
@@ -309,19 +272,17 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
   // Now that we've scanned the entire array, emit our new comparison(s).  We
   // order the state machines in complexity of the generated code.
 
-  // If inbounds keyword is not present, Idx * ElementSize can overflow.
-  // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+  // If inbounds keyword is not present, Idx * Stride can overflow.
+  // Let's assume that Stride is 2 and the wanted value is at offset 0.
   // Then, there are two possible values for Idx to match offset 0:
   // 0x00..00, 0x80..00.
   // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
   // comparison is false if Idx was 0x80..00.
   // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
-  unsigned ElementSize =
-      DL.getTypeAllocSize(Init->getType()->getArrayElementType());
   auto MaskIdx = [&](Value *Idx) {
-    if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
+    if (!GEP->isInBounds() && Stride.countr_zero() != 0) {
       Value *Mask = Constant::getAllOnesValue(Idx->getType());
-      Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
+      Mask = Builder.CreateLShr(Mask, Stride.countr_zero());
       Idx = Builder.CreateAnd(Idx, Mask);
     }
     return Idx;

diff  --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 2a2c74dc09e2f..79faefbd5df56 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -371,3 +371,186 @@ define i1 @pr93017(i64 %idx) {
   %cmp = icmp ne ptr %v, null
   ret i1 %cmp
 }
+
+ at g_i32_lo = internal constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+; Mask is 0b10101010
+define i1 @load_vs_array_type_mismatch1(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch1(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 170
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds i16, ptr @g_i32_lo, i32 %idx
+  %load = load i16, ptr %gep
+  %cmp = icmp eq i16 %load, 0
+  ret i1 %cmp
+}
+
+ at g_i32_hi = internal constant [4 x i32] [i32 u0x00010000, i32 u0x00020000, i32 u0x00030000, i32 u0x00040000]
+
+; Mask is 0b01010101
+define i1 @load_vs_array_type_mismatch2(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch2(
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 1, [[TMP1:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 85
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds i16, ptr @g_i32_hi, i32 %idx
+  %load = load i16, ptr %gep
+  %cmp = icmp eq i16 %load, 0
+  ret i1 %cmp
+}
+
+ at g_i16_1 = internal constant [8 x i16] [i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1, i16 0]
+
+; idx == 1 || idx == 3
+define i1 @load_vs_array_type_mismatch_offset1(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch_offset1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds {i16, i16}, ptr @g_i16_1, i32 %idx, i32 1
+  %load = load i16, ptr %gep
+  %cmp = icmp eq i16 %load, 0
+  ret i1 %cmp
+}
+
+ at g_i16_2 = internal constant [8 x i16] [i16 1, i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1]
+
+; idx == 0 || idx == 2
+define i1 @load_vs_array_type_mismatch_offset2(i32 %idx) {
+; CHECK-LABEL: @load_vs_array_type_mismatch_offset2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[IDX:%.*]], -3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr inbounds {i16, i16}, ptr @g_i16_2, i32 %idx, i32 1
+  %load = load i16, ptr %gep
+  %cmp = icmp eq i16 %load, 0
+  ret i1 %cmp
+}
+
+define i1 @offset_larger_than_stride(i32 %idx) {
+; CHECK-LABEL: @offset_larger_than_stride(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [2 x i16], ptr @g_i16_1, i32 1, i32 [[TMP1:%.*]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr [2 x i16], ptr @g_i16_1, i64 1, i32 %idx
+  %load = load i16, ptr %gep
+  %cmp = icmp eq i16 %load, 0
+  ret i1 %cmp
+}
+
+define i1 @load_size_larger_stride(i32 %idx) {
+; CHECK-LABEL: @load_size_larger_stride(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr @g_i16_1, i32 [[IDX:%.*]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[LOAD]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %gep = getelementptr i8, ptr @g_i16_1, i32 %idx
+  %load = load i16, ptr %gep
+  %cmp = icmp eq i16 %load, 0
+  ret i1 %cmp
+}
+
+ at CG_MESSY = constant [9 x i32] [i32 1, i32 7, i32 -1, i32 5, i32 4, i32 1, i32 1, i32 5, i32 4]
+
+define i1 @cmp_load_constant_array_messy(i32 %x){
+; CHECK-LABEL: @cmp_load_constant_array_messy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0:%.*]], 1073741823
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 373
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+
+entry:
+  %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond = icmp slt i32 %isOK, 5
+  ret i1 %cond
+}
+
+define i1 @cmp_
diff _load_constant_array_messy0(i32 %x){
+; CHECK-LABEL: @cmp_
diff _load_constant_array_messy0(
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1:%.*]], 1073741823
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i32 1, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 373
+; CHECK-NEXT:    [[COND:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i32 %x
+  %isOK = load i16, ptr %isOK_ptr
+  %cond = icmp slt i16 %isOK, 5
+  ret i1 %cond
+}
+
+; Load size larger than store size currently not supported.
+define i1 @cmp_
diff _load_constant_array_messy1(i32 %x){
+; CHECK-LABEL: @cmp_
diff _load_constant_array_messy1(
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1:%.*]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+  %isOK_ptr = getelementptr i6, ptr @CG_MESSY, i32 %x
+  %isOK = load i16, ptr %isOK_ptr
+  %cond = icmp slt i16 %isOK, 5
+  ret i1 %cond
+}
+
+define i1 @cmp_load_constant_array_variable_icmp(i32 %x, i32 %y) {
+; CHECK-LABEL: @cmp_load_constant_array_variable_icmp(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG_MESSY, i32 [[X:%.*]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[ISOK]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+entry:
+  %isOK_ptr = getelementptr inbounds i32, ptr @CG_MESSY, i32 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond = icmp ult i32 %isOK, %y
+  ret i1 %cond
+}
+
+ at CG_CLEAR = constant [10 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9]
+
+; Offsets not supported if negative or larger than stride.
+define i1 @cmp_load_constant_additional_positive_offset(i32 %x) {
+; CHECK-LABEL: @cmp_load_constant_additional_positive_offset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 [[X:%.*]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+entry:
+  %isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 5, i32 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond = icmp ult i32 %isOK, 5
+  ret i1 %cond
+}
+
+define i1 @cmp_load_constant_additional_negative_offset(i32 %x) {
+; CHECK-LABEL: @cmp_load_constant_additional_negative_offset(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 [[X:%.*]], i32 -5
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i32 [[ISOK]], 5
+; CHECK-NEXT:    ret i1 [[COND]]
+;
+entry:
+  %isOK_ptr = getelementptr inbounds [1 x i32], ptr @CG_CLEAR, i32 %x, i32 -5
+  %isOK = load i32, ptr %isOK_ptr
+  %cond = icmp ult i32 %isOK, 5
+  ret i1 %cond
+}

diff  --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 047698102a695..b54170ed3a2e8 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -543,10 +543,7 @@ define i1 @cmp_load_gep_global_
diff erent_load_type(i64 %idx) {
 
 define i1 @cmp_load_gep_global_
diff erent_gep_type(i64 %idx) {
 ; CHECK-LABEL: @cmp_load_gep_global_
diff erent_gep_type(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i16, ptr @ary, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 false
 ;
   %gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx
   %load = load i16, ptr %gep