[PATCH] D142208: [InstCombine] Check one use during GEP merging

Fri Jan 20 06:02:34 PST 2023

nikic created this revision.
nikic added reviewers: spatel, RKSimon, lebedev.ri.
Herald added subscribers: StephenFan, hiraditya.
Herald added a project: All.
nikic requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Currently we try to combine two GEPs even if the inner one has more than one use. However, this means we can end up duplicating the index arithmetic.

This patch restricts GEP merging to one-use or constant source. Constant offsets are allowed, because these are considered essentially free.

TBH I'm not really sure whether we should do this, as it's long-standing behavior, and I'm not aware of it causing active problems. This came up as a question during D138637 <https://reviews.llvm.org/D138637>.


https://reviews.llvm.org/D142208

Files:
  llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
  llvm/test/Transforms/InstCombine/gepphigep.ll
  llvm/test/Transforms/InstCombine/getelementptr.ll
  llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll


Index: llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
===================================================================

--- llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
+++ llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
@@ -18,12 +18,12 @@
 ; O1-NEXT:  entry:
 ; O1-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], ptr [[FVP]], i64 0, i32 1
 ; O1-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; O1-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef, i32 1
+; O1-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef
+; O1-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[ARRAYIDX_I_I]], i64 0, i32 1
 ; O1-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
 ; O1-NEXT:    [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; O1-NEXT:    br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
 ; O1:       for.body7.lr.ph.i:
-; O1-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP0]], i64 undef
 ; O1-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; O1-NEXT:    [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef
 ; O1-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]]
@@ -48,12 +48,12 @@
 ; O23-NEXT:  entry:
 ; O23-NEXT:    [[VSRC23_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], ptr [[FVP]], i64 0, i32 1
 ; O23-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[VSRC23_I]], align 8, !tbaa [[TBAA0:![0-9]+]]
-; O23-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef, i32 1
+; O23-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], ptr [[TMP0]], i64 undef
+; O23-NEXT:    [[SIZE4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[ARRAYIDX_I_I]], i64 0, i32 1
 ; O23-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SIZE4_I]], align 8, !tbaa [[TBAA6:![0-9]+]]
 ; O23-NEXT:    [[CMP56_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; O23-NEXT:    br i1 [[CMP56_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
 ; O23:       for.body7.lr.ph.i:
-; O23-NEXT:    [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], ptr [[TMP0]], i64 undef
 ; O23-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; O23-NEXT:    [[ARRAYIDX_I3_I:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 undef
 ; O23-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[FVP]], align 8, !tbaa [[TBAA0]]
Index: llvm/test/Transforms/InstCombine/getelementptr.ll
===================================================================
--- llvm/test/Transforms/InstCombine/getelementptr.ll
+++ llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -1285,7 +1285,7 @@
 ; CHECK-LABEL: @gep_of_gep_multiuse_var_and_const(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr { i32, i32 }, ptr [[P:%.*]], i64 [[IDX:%.*]]
 ; CHECK-NEXT:    call void @use(ptr [[GEP1]])
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr { i32, i32 }, ptr [[P]], i64 [[IDX]], i32 1
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr { i32, i32 }, ptr [[GEP1]], i64 0, i32 1
 ; CHECK-NEXT:    ret ptr [[GEP2]]
 ;
   %gep1 = getelementptr { i32, i32 }, ptr %p, i64 %idx
@@ -1298,7 +1298,7 @@
 ; CHECK-LABEL: @gep_of_gep_multiuse_var_and_var(
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr [4 x i32], ptr [[P:%.*]], i64 [[IDX:%.*]]
 ; CHECK-NEXT:    call void @use(ptr [[GEP1]])
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr [4 x i32], ptr [[P]], i64 [[IDX]], i64 [[IDX2:%.*]]
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr [4 x i32], ptr [[GEP1]], i64 0, i64 [[IDX2:%.*]]
 ; CHECK-NEXT:    ret ptr [[GEP2]]
 ;
   %gep1 = getelementptr [4 x i32], ptr %p, i64 %idx
Index: llvm/test/Transforms/InstCombine/gepphigep.ll
===================================================================
--- llvm/test/Transforms/InstCombine/gepphigep.ll
+++ llvm/test/Transforms/InstCombine/gepphigep.ll
@@ -54,7 +54,7 @@
 ; CHECK-NEXT:    store i32 0, ptr [[TMP10]], align 4
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP1]], i64 [[TMP19:%.*]]
 ; CHECK-NEXT:    store i32 0, ptr [[TMP20]], align 4
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP1]], i64 [[TMP9]], i32 1
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[STRUCT2]], ptr [[TMP10]], i64 0, i32 1
 ; CHECK-NEXT:    [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP25]]
 ;
Index: llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2031,11 +2031,15 @@
     if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP))
       return nullptr;   // Wait until our source is folded to completion.
 
+  // Only try to combine GEPs if the source has one use or is a constant GEP,
+  // otherwise we will duplicate index arithmetic.
+  if (!Src->hasOneUse() && !Src->hasAllConstantIndices())
+    return nullptr;
+
   // For constant GEPs, use a more general offset-based folding approach.
   // Only do this for opaque pointers, as the result element type may change.
   Type *PtrTy = Src->getType()->getScalarType();
-  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices() &&
-      (Src->hasOneUse() || Src->hasAllConstantIndices())) {
+  if (PtrTy->isOpaquePointerTy() && GEP.hasAllConstantIndices()) {
     // Split Src into a variable part and a constant suffix.
     gep_type_iterator GTI = gep_type_begin(*Src);
     Type *BaseType = GTI.getIndexedType();


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D142208.490802.patch
Type: text/x-patch
Size: 5996 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230120/7a5b6108/attachment.bin>