[llvm] [InstCombine] Don't require GEP in indexed compare fold (PR #81614)

Mon Apr 28 09:40:18 PDT 2025

https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/81614

>From 44220ba6a3a6184eb8a34983b898221b849abc68 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 28 Nov 2023 11:21:01 +0100
Subject: [PATCH] [InstCombine] Don't require GEP in indexed compare fold

The indexed compare fold folds comparisons like p+a == p+b to
a == b, even in cases where the a/b are complex (e.g. via multiple
geps, or phis).

Currently, it requires that the LHS is actually a GEP, but this
requirement isn't really necessary: We can handle the pattern
p == p+b as well.

This patch removes the GEP requirement, allowing additional
comparisons to be optimized away.
---
 .../InstCombine/InstCombineCompares.cpp       | 39 ++++++++++---------
 .../Transforms/InstCombine/getelementptr.ll   |  8 ++--
 llvm/test/Transforms/InstCombine/pr39908.ll   | 12 ++----
 .../PhaseOrdering/loop-access-checks.ll       | 11 +++---
 4 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b7b0bb7361359..0b144d779ce81 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -626,32 +626,34 @@ static Value *rewriteGEPAsOffset(Value *Start, Value *Base, GEPNoWrapFlags NW,
   return NewInsts[Start];
 }
 
-/// Converts (CMP GEPLHS, RHS) if this change would make RHS a constant.
+/// Converts (CMP LHS, RHS) if this change would make RHS a constant.
 /// We can look through PHIs, GEPs and casts in order to determine a common base
-/// between GEPLHS and RHS.
-static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
-                                              CmpPredicate Cond,
+/// between LHS and RHS.
+static Instruction *transformToIndexedCompare(Value *LHS, Value *RHS,
+                                              ICmpInst::Predicate Cond,
                                               const DataLayout &DL,
                                               InstCombiner &IC) {
-  // FIXME: Support vector of pointers.
-  if (GEPLHS->getType()->isVectorTy())
+  if (ICmpInst::isSigned(Cond))
     return nullptr;
 
-  if (!GEPLHS->hasAllConstantIndices())
+  // FIXME: Support vector of pointers.
+  if (!LHS->getType()->isPointerTy())
     return nullptr;
 
-  APInt Offset(DL.getIndexTypeSizeInBits(GEPLHS->getType()), 0);
+  APInt Offset(DL.getIndexTypeSizeInBits(LHS->getType()), 0);
   Value *PtrBase =
-      GEPLHS->stripAndAccumulateConstantOffsets(DL, Offset,
-                                                /*AllowNonInbounds*/ false);
+      LHS->stripAndAccumulateConstantOffsets(DL, Offset,
+                                             /*AllowNonInbounds*/ false);
 
   // Bail if we looked through addrspacecast.
-  if (PtrBase->getType() != GEPLHS->getType())
+  if (PtrBase->getType() != LHS->getType())
     return nullptr;
 
   // The set of nodes that will take part in this transformation.
   SetVector<Value *> Nodes;
-  GEPNoWrapFlags NW = GEPLHS->getNoWrapFlags();
+  GEPNoWrapFlags NW = GEPNoWrapFlags::all();
+  if (auto *GEPLHS = dyn_cast<GEPOperator>(LHS))
+    NW = GEPLHS->getNoWrapFlags();
   if (!canRewriteGEPAsOffset(RHS, PtrBase, NW, DL, Nodes))
     return nullptr;
 
@@ -800,10 +802,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
         return replaceInstUsesWith(I, Cmp);
       }
 
-      // Otherwise, the base pointers are different and the indices are
-      // different. Try convert this to an indexed compare by looking through
-      // PHIs/casts.
-      return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
+      return nullptr;
     }
 
     if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands() &&
@@ -851,9 +850,7 @@ Instruction *InstCombinerImpl::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
     }
   }
 
-  // Try convert this to an indexed compare by looking through PHIs/casts as a
-  // last resort.
-  return transformToIndexedCompare(GEPLHS, RHS, Cond, DL, *this);
+  return nullptr;
 }
 
 bool InstCombinerImpl::foldAllocaCmp(AllocaInst *Alloca) {
@@ -7312,6 +7309,10 @@ Instruction *InstCombinerImpl::foldICmpCommutative(CmpPredicate Pred,
     if (Instruction *NI = foldGEPICmp(GEP, Op1, Pred, CxtI))
       return NI;
 
+  if (Instruction *Res =
+          transformToIndexedCompare(Op0, Op1, Pred, getDataLayout(), *this))
+    return Res;
+
   if (auto *SI = dyn_cast<SelectInst>(Op0))
     if (Instruction *NI = foldSelectICmp(Pred, SI, Op1, CxtI))
       return NI;
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index feba952919b9a..7e7b23929e740 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -686,15 +686,15 @@ define i32 @test28() nounwind  {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[ORIENTATIONS:%.*]] = alloca [1 x [1 x %struct.x]], align 8
 ; CHECK-NEXT:    [[T3:%.*]] = call i32 @puts(ptr noundef nonnull dereferenceable(1) @.str) #[[ATTR0]]
-; CHECK-NEXT:    [[T45:%.*]] = getelementptr inbounds nuw i8, ptr [[ORIENTATIONS]], i64 1
 ; CHECK-NEXT:    br label [[BB10:%.*]]
 ; CHECK:       bb10:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVAR_NEXT:%.*]], [[BB10]] ]
 ; CHECK-NEXT:    [[T12_REC:%.*]] = xor i32 [[INDVAR]], -1
 ; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[T12_REC]] to i64
-; CHECK-NEXT:    [[T12:%.*]] = getelementptr inbounds [[STRUCT_X:%.*]], ptr [[T45]], i64 [[TMP0]]
-; CHECK-NEXT:    [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12]]) #[[ATTR0]]
-; CHECK-NEXT:    [[T84:%.*]] = icmp eq ptr [[T12]], [[ORIENTATIONS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[ORIENTATIONS]], i64 [[TMP0]]
+; CHECK-NEXT:    [[T12_PTR:%.*]] = getelementptr i8, ptr [[TMP1]], i64 1
+; CHECK-NEXT:    [[T16:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(1) @.str1, ptr nonnull [[T12_PTR]]) #[[ATTR0]]
+; CHECK-NEXT:    [[T84:%.*]] = icmp eq i32 [[INDVAR]], 0
 ; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
 ; CHECK-NEXT:    br i1 [[T84]], label [[BB17:%.*]], label [[BB10]]
 ; CHECK:       bb17:
diff --git a/llvm/test/Transforms/InstCombine/pr39908.ll b/llvm/test/Transforms/InstCombine/pr39908.ll
index ca143f417fb27..5d13a331c6d2e 100644
--- a/llvm/test/Transforms/InstCombine/pr39908.ll
+++ b/llvm/test/Transforms/InstCombine/pr39908.ll
@@ -7,9 +7,7 @@ target datalayout = "p:32:32"
 
 define i1 @test(ptr %p, i32 %n) {
 ; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[N:%.*]], i32 0, i32 0
-; CHECK-NEXT:    [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %end = getelementptr inbounds [0 x %S], ptr %p, i32 0, i32 %n, i32 0, i32 0
@@ -22,9 +20,7 @@ define i1 @test(ptr %p, i32 %n) {
 define i1 @test64(ptr %p, i64 %n) {
 ; CHECK-LABEL: @test64(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0
-; CHECK-NEXT:    [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 0
@@ -37,9 +33,7 @@ define i1 @test64(ptr %p, i64 %n) {
 define i1 @test64_overflow(ptr %p, i64 %n) {
 ; CHECK-LABEL: @test64_overflow(
 ; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[N:%.*]] to i32
-; CHECK-NEXT:    [[END:%.*]] = getelementptr inbounds [0 x %S], ptr [[P:%.*]], i32 0, i32 [[TMP1]], i32 0, i32 0
-; CHECK-NEXT:    [[LAST:%.*]] = getelementptr inbounds i8, ptr [[END]], i32 -8
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq ptr [[LAST]], [[P]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP1]], 1
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %end = getelementptr inbounds [0 x %S], ptr %p, i64 0, i64 %n, i32 0, i64 8589934592
diff --git a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
index e53b8687af915..9a88f64730683 100644
--- a/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
+++ b/llvm/test/Transforms/PhaseOrdering/loop-access-checks.ll
@@ -24,7 +24,7 @@ define void @test_fill_with_foreach([2 x i64] %elems.coerce) {
 ; CHECK-NEXT:    [[ELEMS_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = inttoptr i64 [[ELEMS_COERCE_FCA_0_EXTRACT]] to ptr
 ; CHECK-NEXT:    [[ELEMS_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i64] [[ELEMS_COERCE]], 1
-; CHECK-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i64 [[ELEMS_COERCE_FCA_1_EXTRACT]]
+; CHECK-NEXT:    [[ADD_PTR_I_IDX:%.*]] = shl nsw i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 2
 ; CHECK-NEXT:    [[CMP_NOT_I_I_I_I:%.*]] = icmp slt i64 [[ELEMS_COERCE_FCA_1_EXTRACT]], 0
 ; CHECK-NEXT:    br i1 [[CMP_NOT_I_I_I_I]], label [[ERROR:%.*]], label [[FOR_COND_PREHEADER_SPLIT:%.*]]
 ; CHECK:       for.cond.preheader.split:
@@ -36,10 +36,11 @@ define void @test_fill_with_foreach([2 x i64] %elems.coerce) {
 ; CHECK-NEXT:    tail call void @error()
 ; CHECK-NEXT:    br label [[COMMON_RET]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[__BEGIN1_SROA_0_03:%.*]] = phi ptr [ [[INCDEC_PTR_I:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_COND_PREHEADER_SPLIT]] ]
-; CHECK-NEXT:    tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_03]])
-; CHECK-NEXT:    [[INCDEC_PTR_I]] = getelementptr inbounds nuw i8, ptr [[__BEGIN1_SROA_0_03]], i64 4
-; CHECK-NEXT:    [[CMP_I_NOT:%.*]] = icmp eq ptr [[INCDEC_PTR_I]], [[ADD_PTR_I]]
+; CHECK-NEXT:    [[__BEGIN1_SROA_0_0_IDX3:%.*]] = phi i64 [ [[__BEGIN1_SROA_0_0_ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER_SPLIT]] ]
+; CHECK-NEXT:    [[__BEGIN1_SROA_0_0_PTR4:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 [[__BEGIN1_SROA_0_0_IDX3]]
+; CHECK-NEXT:    tail call void @use(ptr noundef nonnull align 4 dereferenceable(4) [[__BEGIN1_SROA_0_0_PTR4]])
+; CHECK-NEXT:    [[__BEGIN1_SROA_0_0_ADD]] = add nuw nsw i64 [[__BEGIN1_SROA_0_0_IDX3]], 4
+; CHECK-NEXT:    [[CMP_I_NOT:%.*]] = icmp eq i64 [[__BEGIN1_SROA_0_0_ADD]], [[ADD_PTR_I_IDX]]
 ; CHECK-NEXT:    br i1 [[CMP_I_NOT]], label [[COMMON_RET]], label [[FOR_BODY]]
 ;
 entry: