[llvm] [InstCombine] Fold icmp(constants[x]) when the range of x is given (PR #67093)

Fri Dec 29 05:04:30 PST 2023

https://github.com/XChy updated https://github.com/llvm/llvm-project/pull/67093

>From c5d4ac3625af729805cb9046d7b01e7009caea7a Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 31 Jul 2023 17:10:45 +0800
Subject: [PATCH 1/2] [InstCombine] Tests for simplifying icmp(constants[x])

---
 llvm/test/Transforms/InstCombine/load-cmp.ll | 146 +++++++++++++++++++
 1 file changed, 146 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index e941284a798ed1..672eef5754a0ed 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -334,3 +334,149 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
   %r = icmp eq i32 %q, 9
   ret i1 %r
 }
+
+
+ at CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
+
+define i1 @cmp_load_constant_array0(i64 %x){
+; CHECK-LABEL: @cmp_load_constant_array0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i64 [[X:%.*]], 2
+; CHECK-NEXT:    br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       case1:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3
+; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+;
+entry:
+  %cond = icmp ult i64 %x, 2
+  br i1 %cond, label %case1, label %case2
+
+case2:
+  ret i1 0
+
+case1:
+  %isOK_ptr = getelementptr inbounds i32, ptr @CG, i64 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond_inferred = icmp ult i32 %isOK, 3
+  ret i1 %cond_inferred
+}
+
+define i1 @cmp_load_constant_array1(i64 %x){
+; CHECK-LABEL: @cmp_load_constant_array1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i64 [[X:%.*]], 2
+; CHECK-NEXT:    br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       case1:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10
+; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+;
+entry:
+  %cond = icmp ult i64 %x, 2
+  br i1 %cond, label %case1, label %case2
+
+case2:
+  ret i1 0
+
+case1:
+  %isOK_ptr = getelementptr inbounds i32, ptr @CG, i64 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond_inferred = icmp ugt i32 %isOK, 10
+  ret i1 %cond_inferred
+}
+
+ at CG_MESSY = constant [9 x i32] [i32 1, i32 7, i32 -1, i32 5, i32 4, i32 1, i32 1, i32 5, i32 4]
+
+define i1 @cmp_load_constant_array_messy(i64 %x){
+; CHECK-LABEL: @cmp_load_constant_array_messy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[X:%.*]], 6
+; CHECK-NEXT:    br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       case1:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5
+; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+;
+entry:
+  %cond = icmp slt i64 %x, 6
+  br i1 %cond, label %case1, label %case2
+
+case2:
+  ret i1 0
+
+case1:
+  %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond_inferred = icmp slt i32 %isOK, 5
+  ret i1 %cond_inferred
+}
+
+define i1 @cmp_diff_load_constant_array_messy0(i64 %x){
+; CHECK-LABEL: @cmp_diff_load_constant_array_messy0(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP1]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+;
+  %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x
+  %isOK = load i16, ptr %isOK_ptr
+  %cond_inferred = icmp slt i16 %isOK, 5
+  ret i1 %cond_inferred
+}
+
+define i1 @cmp_diff_load_constant_array_messy1(i64 %x){
+; CHECK-LABEL: @cmp_diff_load_constant_array_messy1(
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+;
+%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x
+  %isOK = load i16, ptr %isOK_ptr
+  %cond_inferred = icmp slt i16 %isOK, 5
+  ret i1 %cond_inferred
+}
+
+define i1 @cmp_load_constant_array_fail0(i64 %x, i32 %y) {
+; CHECK-LABEL: @cmp_load_constant_array_fail0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp ult i64 [[X:%.*]], 3
+; CHECK-NEXT:    br i1 [[COND]], label [[CASE1:%.*]], label [[CASE2:%.*]]
+; CHECK:       case2:
+; CHECK-NEXT:    ret i1 false
+; CHECK:       case1:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
+; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
+; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], [[Y:%.*]]
+; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+;
+entry:
+  %cond = icmp ult i64 %x, 3
+  br i1 %cond, label %case1, label %case2
+
+case2:
+  ret i1 0
+
+case1:
+  %isOK_ptr = getelementptr inbounds i32, ptr @CG, i64 %x
+  %isOK = load i32, ptr %isOK_ptr
+  %cond_inferred = icmp ult i32 %isOK, %y
+  ret i1 %cond_inferred
+}
+

>From 992f410b712cd6e3d64edcf5fe40395a611de58f Mon Sep 17 00:00:00 2001
From: XChy <xxs_chy at outlook.com>
Date: Mon, 31 Jul 2023 17:17:07 +0800
Subject: [PATCH 2/2] [InstCombine] Fold icmp(constants[x]) when the range of x
 is given

---
 .../InstCombine/InstCombineCompares.cpp       | 157 +++++++++---------
 llvm/test/Transforms/InstCombine/load-cmp.ll  |  49 ++----
 .../test/Transforms/InstCombine/opaque-ptr.ll |  10 +-
 3 files changed, 99 insertions(+), 117 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 818099e7e12cdf..ca6868876a15bd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -110,57 +110,35 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
 Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     LoadInst *LI, GetElementPtrInst *GEP, GlobalVariable *GV, CmpInst &ICI,
     ConstantInt *AndCst) {
-  if (LI->isVolatile() || LI->getType() != GEP->getResultElementType() ||
-      GV->getValueType() != GEP->getSourceElementType() || !GV->isConstant() ||
-      !GV->hasDefinitiveInitializer())
+  if (LI->isVolatile() || !GV->isConstant() || !GV->hasDefinitiveInitializer())
     return nullptr;
 
   Constant *Init = GV->getInitializer();
-  if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
-    return nullptr;
+  uint64_t DataSize = DL.getTypeAllocSize(Init->getType());
 
-  uint64_t ArrayElementCount = Init->getType()->getArrayNumElements();
   // Don't blow up on huge arrays.
-  if (ArrayElementCount > MaxArraySizeForCombine)
+  uint64_t MaxDataSizeForCombine = MaxArraySizeForCombine * 8;
+  if (DataSize > MaxDataSizeForCombine)
     return nullptr;
 
-  // There are many forms of this optimization we can handle, for now, just do
-  // the simple index into a single-dimensional array.
-  //
-  // Require: GEP GV, 0, i {{, constant indices}}
-  if (GEP->getNumOperands() < 3 || !isa<ConstantInt>(GEP->getOperand(1)) ||
-      !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
-      isa<Constant>(GEP->getOperand(2)))
-    return nullptr;
+  Type *LoadedTy = LI->getType();
+  uint64_t IndexSize = DL.getIndexTypeSizeInBits(GEP->getType());
+  Type *PtrIdxTy = DL.getIndexType(GEP->getType());
 
-  // Check that indices after the variable are constants and in-range for the
-  // type they index.  Collect the indices.  This is typically for arrays of
-  // structs.
-  SmallVector<unsigned, 4> LaterIndices;
-
-  Type *EltTy = Init->getType()->getArrayElementType();
-  for (unsigned i = 3, e = GEP->getNumOperands(); i != e; ++i) {
-    ConstantInt *Idx = dyn_cast<ConstantInt>(GEP->getOperand(i));
-    if (!Idx)
-      return nullptr; // Variable index.
-
-    uint64_t IdxVal = Idx->getZExtValue();
-    if ((unsigned)IdxVal != IdxVal)
-      return nullptr; // Too large array index.
-
-    if (StructType *STy = dyn_cast<StructType>(EltTy))
-      EltTy = STy->getElementType(IdxVal);
-    else if (ArrayType *ATy = dyn_cast<ArrayType>(EltTy)) {
-      if (IdxVal >= ATy->getNumElements())
-        return nullptr;
-      EltTy = ATy->getElementType();
-    } else {
-      return nullptr; // Unknown type.
-    }
+  MapVector<Value *, APInt> VariableOffsets;
+  APInt ConstantOffset(IndexSize, 0);
 
-    LaterIndices.push_back(IdxVal);
-  }
+  if (!GEP->collectOffset(getDataLayout(), IndexSize, VariableOffsets,
+                          ConstantOffset))
+    return nullptr;
 
+  // Restrict to one variable currently.
+  if (VariableOffsets.size() != 1)
+    return nullptr;
+
+  // There are many forms of this optimization we can handle.
+  // Possible TODO: Fold: cmp(A[ax + by + ... + C], Rhs) <=> cmp(ax + by + ....
+  // + C, IndexRhs)
   enum { Overdefined = -3, Undefined = -2 };
 
   // Variables for our state machines.
@@ -189,20 +167,34 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
   // the array, this will fully represent all the comparison results.
   uint64_t MagicBitvector = 0;
 
+  Value *Idx = nullptr;
+
   // Scan the array and see if one of our patterns matches.
-  Constant *CompareRHS = cast<Constant>(ICI.getOperand(1));
-  for (unsigned i = 0, e = ArrayElementCount; i != e; ++i) {
-    Constant *Elt = Init->getAggregateElement(i);
+  Constant *ComparedRHS = cast<Constant>(ICI.getOperand(1));
+  // The longest step we can reach once.
+  APInt OffsetStep = VariableOffsets.front().second;
+
+  // BeginOffset is the offset from constant pointer where we begin scanning the
+  // constant. Make BeginOffset the smallest offset greater than 0
+  APInt BeginOffset = ConstantOffset.srem(OffsetStep);
+  if (BeginOffset.slt(0))
+    BeginOffset += OffsetStep;
+
+  uint64_t ElementCountToTraverse =
+      (DataSize - BeginOffset).udiv(OffsetStep).getZExtValue() + 1;
+
+  // Don't traverse too many times.
+  if (ElementCountToTraverse > MaxArraySizeForCombine)
+    return nullptr;
+
+  APInt CurOffset = BeginOffset;
+  for (uint64_t i = 0; i < ElementCountToTraverse;
+       ++i, CurOffset += OffsetStep) {
+    Constant *Elt = ConstantFoldLoadFromConstPtr(GV, LoadedTy, CurOffset, DL);
+
     if (!Elt)
       return nullptr;
 
-    // If this is indexing an array of structures, get the structure element.
-    if (!LaterIndices.empty()) {
-      Elt = ConstantFoldExtractValueInstruction(Elt, LaterIndices);
-      if (!Elt)
-        return nullptr;
-    }
-
     // If the element is masked, handle it.
     if (AndCst) {
       Elt = ConstantFoldBinaryOpOperands(Instruction::And, Elt, AndCst, DL);
@@ -212,7 +204,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
 
     // Find out if the comparison would be true or false for the i'th element.
     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
-                                                  CompareRHS, DL, &TLI);
+                                                  ComparedRHS, DL, &TLI);
     // If the result is undef for this element, ignore it.
     if (isa<UndefValue>(C)) {
       // Extend range state machines to cover this element in case there is an
@@ -285,40 +277,47 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
 
   // Now that we've scanned the entire array, emit our new comparison(s).  We
   // order the state machines in complexity of the generated code.
-  Value *Idx = GEP->getOperand(2);
-
-  // If the index is larger than the pointer offset size of the target, truncate
-  // the index down like the GEP would do implicitly.  We don't have to do this
-  // for an inbounds GEP because the index can't be out of range.
-  if (!GEP->isInBounds()) {
-    Type *PtrIdxTy = DL.getIndexType(GEP->getType());
-    unsigned OffsetSize = PtrIdxTy->getIntegerBitWidth();
-    if (Idx->getType()->getPrimitiveSizeInBits().getFixedValue() > OffsetSize)
-      Idx = Builder.CreateTrunc(Idx, PtrIdxTy);
-  }
 
-  // If inbounds keyword is not present, Idx * ElementSize can overflow.
-  // Let's assume that ElementSize is 2 and the wanted value is at offset 0.
+  // If inbounds keyword is not present, Idx * LongestStep can overflow.
+  // Let's assume that LongestStep is 2 and the wanted value is at offset 0.
   // Then, there are two possible values for Idx to match offset 0:
   // 0x00..00, 0x80..00.
   // Emitting 'icmp eq Idx, 0' isn't correct in this case because the
   // comparison is false if Idx was 0x80..00.
   // We need to erase the highest countTrailingZeros(ElementSize) bits of Idx.
-  unsigned ElementSize =
-      DL.getTypeAllocSize(Init->getType()->getArrayElementType());
   auto MaskIdx = [&](Value *Idx) {
-    if (!GEP->isInBounds() && llvm::countr_zero(ElementSize) != 0) {
+    if (!GEP->isInBounds() && OffsetStep.countr_zero() != 0) {
       Value *Mask = ConstantInt::get(Idx->getType(), -1);
-      Mask = Builder.CreateLShr(Mask, llvm::countr_zero(ElementSize));
+      Mask = Builder.CreateLShr(Mask, OffsetStep.countr_zero());
       Idx = Builder.CreateAnd(Idx, Mask);
     }
     return Idx;
   };
 
+  // Build the index expression lazily.
+  auto LazyGetIndex = [&](Value *CurIdx) -> Value * {
+    if (CurIdx)
+      return CurIdx;
+
+    // Initial bias for index. For example, when we fold cmp(GV[x + 3], C) into
+    // idx < 3, we actually get x + 3 < 3
+    Value *Bias = ConstantInt::get(
+        PtrIdxTy, (ConstantOffset - BeginOffset).sdiv(OffsetStep));
+    uint64_t IdxBitWidth = PtrIdxTy->getScalarSizeInBits();
+
+    auto [Var, Coefficient] = VariableOffsets.front();
+    uint64_t VarBitWidth = Var->getType()->getScalarSizeInBits();
+    assert("GEP indices do not get canonicalized to the index type" &&
+           VarBitWidth == IdxBitWidth);
+    Value *Idx = Builder.CreateAdd(Bias, Var);
+
+    return MaskIdx(Idx);
+  };
+
   // If the comparison is only true for one or two elements, emit direct
   // comparisons.
   if (SecondTrueElement != Overdefined) {
-    Idx = MaskIdx(Idx);
+    Idx = LazyGetIndex(Idx);
     // None true -> false.
     if (FirstTrueElement == Undefined)
       return replaceInstUsesWith(ICI, Builder.getFalse());
@@ -339,7 +338,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
   // If the comparison is only false for one or two elements, emit direct
   // comparisons.
   if (SecondFalseElement != Overdefined) {
-    Idx = MaskIdx(Idx);
+    Idx = LazyGetIndex(Idx);
     // None false -> true.
     if (FirstFalseElement == Undefined)
       return replaceInstUsesWith(ICI, Builder.getTrue());
@@ -362,7 +361,7 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
   // where it is true, emit the range check.
   if (TrueRangeEnd != Overdefined) {
     assert(TrueRangeEnd != FirstTrueElement && "Should emit single compare");
-    Idx = MaskIdx(Idx);
+    Idx = LazyGetIndex(Idx);
 
     // Generate (i-FirstTrue) <u (TrueRangeEnd-FirstTrue+1).
     if (FirstTrueElement) {
@@ -371,14 +370,14 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     }
 
     Value *End =
-        ConstantInt::get(Idx->getType(), TrueRangeEnd - FirstTrueElement + 1);
+        ConstantInt::get(PtrIdxTy, TrueRangeEnd - FirstTrueElement + 1);
     return new ICmpInst(ICmpInst::ICMP_ULT, Idx, End);
   }
 
   // False range check.
   if (FalseRangeEnd != Overdefined) {
     assert(FalseRangeEnd != FirstFalseElement && "Should emit single compare");
-    Idx = MaskIdx(Idx);
+    Idx = LazyGetIndex(Idx);
     // Generate (i-FirstFalse) >u (FalseRangeEnd-FirstFalse).
     if (FirstFalseElement) {
       Value *Offs = ConstantInt::get(Idx->getType(), -FirstFalseElement);
@@ -399,13 +398,15 @@ Instruction *InstCombinerImpl::foldCmpLoadFromIndexedGlobal(
     // Look for an appropriate type:
     // - The type of Idx if the magic fits
     // - The smallest fitting legal type
-    if (ArrayElementCount <= Idx->getType()->getIntegerBitWidth())
-      Ty = Idx->getType();
+
+    if (ElementCountToTraverse <= PtrIdxTy->getIntegerBitWidth())
+      Ty = PtrIdxTy;
     else
-      Ty = DL.getSmallestLegalIntType(Init->getContext(), ArrayElementCount);
+      Ty = DL.getSmallestLegalIntType(Init->getContext(),
+                                      ElementCountToTraverse);
 
     if (Ty) {
-      Idx = MaskIdx(Idx);
+      Idx = LazyGetIndex(Idx);
       Value *V = Builder.CreateIntCast(Idx, Ty, false);
       V = Builder.CreateLShr(ConstantInt::get(Ty, MagicBitvector), V);
       V = Builder.CreateAnd(ConstantInt::get(Ty, 1), V);
diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll
index 672eef5754a0ed..80744a6b0ff2d8 100644
--- a/llvm/test/Transforms/InstCombine/load-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/load-cmp.ll
@@ -215,10 +215,7 @@ define i1 @test10_struct(i32 %x) {
 
 define i1 @test10_struct_noinbounds(i32 %x) {
 ; CHECK-LABEL: @test10_struct_noinbounds(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr [[FOO:%.*]], ptr @GS, i32 [[X:%.*]], i32 0
-; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[Q]], 9
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %p = getelementptr %Foo, ptr @GS, i32 %x, i32 0
   %q = load i32, ptr %p
@@ -252,11 +249,7 @@ define i1 @test10_struct_i64(i64 %x){
 
 define i1 @test10_struct_noinbounds_i16(i16 %x) {
 ; CHECK-LABEL: @test10_struct_noinbounds_i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[X:%.*]] to i32
-; CHECK-NEXT:    [[P:%.*]] = getelementptr [[FOO:%.*]], ptr @GS, i32 [[TMP1]], i32 0
-; CHECK-NEXT:    [[Q:%.*]] = load i32, ptr [[P]], align 4
-; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[Q]], 0
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %p = getelementptr %Foo, ptr @GS, i16 %x, i32 0
   %q = load i32, ptr %p
@@ -338,6 +331,7 @@ define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
 
 @CG = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
 
+; TODO: Fold it globally.
 define i1 @cmp_load_constant_array0(i64 %x){
 ; CHECK-LABEL: @cmp_load_constant_array0(
 ; CHECK-NEXT:  entry:
@@ -346,11 +340,7 @@ define i1 @cmp_load_constant_array0(i64 %x){
 ; CHECK:       case2:
 ; CHECK-NEXT:    ret i1 false
 ; CHECK:       case1:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ult i32 [[ISOK]], 3
-; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+; CHECK-NEXT:    ret i1 true
 ;
 entry:
   %cond = icmp ult i64 %x, 2
@@ -374,11 +364,7 @@ define i1 @cmp_load_constant_array1(i64 %x){
 ; CHECK:       case2:
 ; CHECK-NEXT:    ret i1 false
 ; CHECK:       case1:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr inbounds i32, ptr @CG, i32 [[TMP0]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ugt i32 [[ISOK]], 10
-; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
+; CHECK-NEXT:    ret i1 false
 ;
 entry:
   %cond = icmp ult i64 %x, 2
@@ -405,9 +391,10 @@ define i1 @cmp_load_constant_array_messy(i64 %x){
 ; CHECK-NEXT:    ret i1 false
 ; CHECK:       case1:
 ; CHECK-NEXT:    [[TMP0:%.*]] = trunc i64 [[X]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP0]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i32, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i32 [[ISOK]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1073741823
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 373, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP3]], 0
 ; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
 ;
 entry:
@@ -427,9 +414,10 @@ case1:
 define i1 @cmp_diff_load_constant_array_messy0(i64 %x){
 ; CHECK-LABEL: @cmp_diff_load_constant_array_messy0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i32, ptr @CG_MESSY, i32 [[TMP1]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 4
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1073741823
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 373, [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i32 [[TMP3]], 1
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ne i32 [[TMP4]], 0
 ; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
 ;
   %isOK_ptr = getelementptr i32, ptr @CG_MESSY, i64 %x
@@ -440,13 +428,13 @@ define i1 @cmp_diff_load_constant_array_messy0(i64 %x){
 
 define i1 @cmp_diff_load_constant_array_messy1(i64 %x){
 ; CHECK-LABEL: @cmp_diff_load_constant_array_messy1(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[X:%.*]] to i32
-; CHECK-NEXT:    [[ISOK_PTR:%.*]] = getelementptr i6, ptr @CG_MESSY, i32 [[TMP1]]
-; CHECK-NEXT:    [[ISOK:%.*]] = load i16, ptr [[ISOK_PTR]], align 2
-; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp slt i16 [[ISOK]], 5
+; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[X:%.*]], 4294967295
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 66160388071, [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 1
+; CHECK-NEXT:    [[COND_INFERRED:%.*]] = icmp ne i64 [[TMP3]], 0
 ; CHECK-NEXT:    ret i1 [[COND_INFERRED]]
 ;
-%isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x
+  %isOK_ptr = getelementptr i6, ptr @CG_MESSY, i64 %x
   %isOK = load i16, ptr %isOK_ptr
   %cond_inferred = icmp slt i16 %isOK, 5
   ret i1 %cond_inferred
@@ -479,4 +467,3 @@ case1:
   %cond_inferred = icmp ult i32 %isOK, %y
   ret i1 %cond_inferred
 }
-
diff --git a/llvm/test/Transforms/InstCombine/opaque-ptr.ll b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
index 4448a49ad92bf5..3ae794f10f3e18 100644
--- a/llvm/test/Transforms/InstCombine/opaque-ptr.ll
+++ b/llvm/test/Transforms/InstCombine/opaque-ptr.ll
@@ -487,10 +487,7 @@ define i1 @cmp_load_gep_global(i64 %idx) {
 
 define i1 @cmp_load_gep_global_different_load_type(i64 %idx) {
 ; CHECK-LABEL: @cmp_load_gep_global_different_load_type(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [4 x i8], ptr @ary, i64 0, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 false
 ;
   %gep = getelementptr [4 x i8], ptr @ary, i64 0, i64 %idx
   %load = load i16, ptr %gep
@@ -500,10 +497,7 @@ define i1 @cmp_load_gep_global_different_load_type(i64 %idx) {
 
 define i1 @cmp_load_gep_global_different_gep_type(i64 %idx) {
 ; CHECK-LABEL: @cmp_load_gep_global_different_gep_type(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr [4 x i16], ptr @ary, i64 0, i64 [[IDX:%.*]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load i16, ptr [[GEP]], align 2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[LOAD]], 3
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 false
 ;
   %gep = getelementptr [4 x i16], ptr @ary, i64 0, i64 %idx
   %load = load i16, ptr %gep