[llvm] a848916 - [instcombine] Scalarize operands of vector geps if possible (#145402)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 24 09:48:03 PDT 2025


Author: Philip Reames
Date: 2025-06-24T09:48:00-07:00
New Revision: a84891698a2a13780d3804686ebf31d3639dd6a4

URL: https://github.com/llvm/llvm-project/commit/a84891698a2a13780d3804686ebf31d3639dd6a4
DIFF: https://github.com/llvm/llvm-project/commit/a84891698a2a13780d3804686ebf31d3639dd6a4.diff

LOG: [instcombine] Scalarize operands of vector geps if possible (#145402)

If we have a gep with vector indices which were splats (either constants
or shuffles), prefer the scalar form of the index. If all operands are
scalarizable, then prefer a scalar gep with splat following.

This does loose some information about undef/poison lanes, but I'm not
sure that's significant versus the number of downstream transformations
which get confused by having to manual scalarize operands.

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
    llvm/test/Transforms/InstCombine/getelementptr.ll
    llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
    llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
    llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
    llvm/test/Transforms/InstCombine/vector_gep1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index ce42029261359..99acb02561d53 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2993,10 +2993,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
         return replaceInstUsesWith(GEP, V);
       return &GEP;
     }
-
-    // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
-    // possible (decide on canonical form for pointer broadcast), 3) exploit
-    // undef elements to decrease demanded bits
   }
 
   // Eliminate unneeded casts for indices, and replace indices which displace
@@ -3058,6 +3054,32 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     return replaceInstUsesWith(GEP, NewGEP);
   }
 
+  // Scalarize vector operands; prefer splat-of-gep.as canonical form.
+  // Note that this looses information about undef lanes; we run it after
+  // demanded bits to partially mitigate that loss.
+  if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
+        return Op->getType()->isVectorTy() && getSplatValue(Op);
+      })) {
+    SmallVector<Value *> NewOps;
+    for (auto &Op : GEP.operands()) {
+      if (Op->getType()->isVectorTy())
+        if (Value *Scalar = getSplatValue(Op)) {
+          NewOps.push_back(Scalar);
+          continue;
+        }
+      NewOps.push_back(Op);
+    }
+
+    Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
+                                   ArrayRef(NewOps).drop_front(), GEP.getName(),
+                                   GEP.getNoWrapFlags());
+    if (!Res->getType()->isVectorTy()) {
+      ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
+      Res = Builder.CreateVectorSplat(EC, Res);
+    }
+    return replaceInstUsesWith(GEP, Res);
+  }
+
   // Check to see if the inputs to the PHI node are getelementptr instructions.
   if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
     if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))

diff  --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
index 0bbb1035b1093..c5f9dc1861704 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -11,13 +11,13 @@ define <16 x ptr> @test(i1 %tobool) {
 ; CHECK-NEXT:    [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
 ; CHECK:       [[F0]]:
-; CHECK-NEXT:    [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1)
+; CHECK-NEXT:    [[MM_VECTORGEP1:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 2
 ; CHECK-NEXT:    br label %[[MERGE:.*]]
 ; CHECK:       [[F1]]:
-; CHECK-NEXT:    [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 2)
+; CHECK-NEXT:    [[MM_VECTORGEP22:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 4
 ; CHECK-NEXT:    br label %[[MERGE]]
 ; CHECK:       [[MERGE]]:
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP1]], %[[F0]] ], [ [[MM_VECTORGEP22]], %[[F1]] ]
 ; CHECK-NEXT:    ret <16 x ptr> [[VEC_PHI]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 7568c6edc429c..b7d263f5eb320 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -247,11 +247,11 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x ptr> %P) nounwind {
 
 define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
 ; CHECK-LABEL: @test13_fixed_fixed(
-; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
-; CHECK-NEXT:    [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4)
-; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
+; CHECK-NEXT:    [[A1:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P:%.*]], i64 0, i64 [[X:%.*]]
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[A1]], i64 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x ptr> [[DOTSPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P]], <2 x i64> [[Y:%.*]]
+; CHECK-NEXT:    [[C:%.*]] = icmp eq <2 x ptr> [[DOTSPLAT]], [[B]]
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %A = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X

diff  --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
index a240dfe7d271a..cbfdebc6cb412 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
@@ -525,9 +525,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {
 
 define ptr @gep_splat_base_w_cv_idx(ptr %base) {
 ; CHECK-LABEL: @gep_splat_base_w_cv_idx(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
 ; CHECK-NEXT:    ret ptr [[EE]]
 ;
   %basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -539,9 +537,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {
 
 define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
 ; CHECK-LABEL: @gep_splat_base_w_vidx(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    ret ptr [[EE]]
 ;
   %basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -597,10 +594,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
 }
 define ptr @gep_splat_both(ptr %base, i64 %idx) {
 ; CHECK-LABEL: @gep_splat_both(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
 ; CHECK-NEXT:    ret ptr [[EE]]
 ;
   %basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -641,9 +635,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
 ;; indices.
 define ptr @PR41624(<2 x ptr> %a) {
 ; CHECK-LABEL: @PR41624(
-; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
+; CHECK-NEXT:    [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
+; CHECK-NEXT:    ret ptr [[R1]]
 ;
   %w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
   %r = extractelement <2 x ptr> %w, i32 0
@@ -657,8 +651,8 @@ define ptr @PR41624(<2 x ptr> %a) {
 define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
 ; CHECK-LABEL: @zero_sized_type_extract(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
 ; CHECK-NEXT:    ret ptr [[T2]]
 ;
 bb:

diff  --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index ee7ef9955e643..f0ef33dcc6b4e 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -528,9 +528,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {
 
 define ptr @gep_splat_base_w_cv_idx(ptr %base) {
 ; CHECK-LABEL: @gep_splat_base_w_cv_idx(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
 ; CHECK-NEXT:    ret ptr [[EE]]
 ;
   %basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -542,9 +540,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {
 
 define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
 ; CHECK-LABEL: @gep_splat_base_w_vidx(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
 ; CHECK-NEXT:    ret ptr [[EE]]
 ;
   %basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -600,10 +597,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
 }
 define ptr @gep_splat_both(ptr %base, i64 %idx) {
 ; CHECK-LABEL: @gep_splat_both(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
 ; CHECK-NEXT:    ret ptr [[EE]]
 ;
   %basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -644,9 +638,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
 ;; indices.
 define ptr @PR41624(<2 x ptr> %a) {
 ; CHECK-LABEL: @PR41624(
-; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
-; CHECK-NEXT:    ret ptr [[R]]
+; CHECK-NEXT:    [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
+; CHECK-NEXT:    [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
+; CHECK-NEXT:    ret ptr [[R1]]
 ;
   %w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
   %r = extractelement <2 x ptr> %w, i32 0
@@ -660,8 +654,8 @@ define ptr @PR41624(<2 x ptr> %a) {
 define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
 ; CHECK-LABEL: @zero_sized_type_extract(
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
-; CHECK-NEXT:    [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
+; CHECK-NEXT:    [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
 ; CHECK-NEXT:    ret ptr [[T2]]
 ;
 bb:

diff  --git a/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
index 994ecc2246bb6..b882d7dfff91e 100644
--- a/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
@@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {
 
 define <2 x ptr> @test7(<2 x ptr> %a) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
 ; CHECK-NEXT:    ret <2 x ptr> [[W]]
 ;
   %w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/InstCombine/vector_gep1.ll b/llvm/test/Transforms/InstCombine/vector_gep1.ll
index ab99ee71fc8db..92de12213bee1 100644
--- a/llvm/test/Transforms/InstCombine/vector_gep1.ll
+++ b/llvm/test/Transforms/InstCombine/vector_gep1.ll
@@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {
 
 define <2 x ptr> @test7(<2 x ptr> %a) {
 ; CHECK-LABEL: @test7(
-; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
 ; CHECK-NEXT:    ret <2 x ptr> [[W]]
 ;
   %w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer


        


More information about the llvm-commits mailing list