[llvm] a848916 - [instcombine] Scalarize operands of vector geps if possible (#145402)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 24 09:48:03 PDT 2025
Author: Philip Reames
Date: 2025-06-24T09:48:00-07:00
New Revision: a84891698a2a13780d3804686ebf31d3639dd6a4
URL: https://github.com/llvm/llvm-project/commit/a84891698a2a13780d3804686ebf31d3639dd6a4
DIFF: https://github.com/llvm/llvm-project/commit/a84891698a2a13780d3804686ebf31d3639dd6a4.diff
LOG: [instcombine] Scalarize operands of vector geps if possible (#145402)
If we have a gep with vector indices which were splats (either constants
or shuffles), prefer the scalar form of the index. If all operands are
scalarizable, then prefer a scalar gep with splat following.
This does loose some information about undef/poison lanes, but I'm not
sure that's significant versus the number of downstream transformations
which get confused by having to manual scalarize operands.
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
llvm/test/Transforms/InstCombine/getelementptr.ll
llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
llvm/test/Transforms/InstCombine/vector_gep1.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index ce42029261359..99acb02561d53 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2993,10 +2993,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return replaceInstUsesWith(GEP, V);
return &GEP;
}
-
- // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if
- // possible (decide on canonical form for pointer broadcast), 3) exploit
- // undef elements to decrease demanded bits
}
// Eliminate unneeded casts for indices, and replace indices which displace
@@ -3058,6 +3054,32 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
return replaceInstUsesWith(GEP, NewGEP);
}
+ // Scalarize vector operands; prefer splat-of-gep.as canonical form.
+ // Note that this looses information about undef lanes; we run it after
+ // demanded bits to partially mitigate that loss.
+ if (GEPType->isVectorTy() && llvm::any_of(GEP.operands(), [](Value *Op) {
+ return Op->getType()->isVectorTy() && getSplatValue(Op);
+ })) {
+ SmallVector<Value *> NewOps;
+ for (auto &Op : GEP.operands()) {
+ if (Op->getType()->isVectorTy())
+ if (Value *Scalar = getSplatValue(Op)) {
+ NewOps.push_back(Scalar);
+ continue;
+ }
+ NewOps.push_back(Op);
+ }
+
+ Value *Res = Builder.CreateGEP(GEP.getSourceElementType(), NewOps[0],
+ ArrayRef(NewOps).drop_front(), GEP.getName(),
+ GEP.getNoWrapFlags());
+ if (!Res->getType()->isVectorTy()) {
+ ElementCount EC = cast<VectorType>(GEPType)->getElementCount();
+ Res = Builder.CreateVectorSplat(EC, Res);
+ }
+ return replaceInstUsesWith(GEP, Res);
+ }
+
// Check to see if the inputs to the PHI node are getelementptr instructions.
if (auto *PN = dyn_cast<PHINode>(PtrOp)) {
if (Value *NewPtrOp = foldGEPOfPhi(GEP, PN, Builder))
diff --git a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
index 0bbb1035b1093..c5f9dc1861704 100644
--- a/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
+++ b/llvm/test/Transforms/InstCombine/fold-phi-arg-gep-to-phi-negative.ll
@@ -11,13 +11,13 @@ define <16 x ptr> @test(i1 %tobool) {
; CHECK-NEXT: [[LANE_15:%.*]] = insertelement <16 x ptr> poison, ptr [[LANE_0]], i64 0
; CHECK-NEXT: br i1 [[TOBOOL]], label %[[F1:.*]], label %[[F0:.*]]
; CHECK: [[F0]]:
-; CHECK-NEXT: [[MM_VECTORGEP:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 1)
+; CHECK-NEXT: [[MM_VECTORGEP1:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 2
; CHECK-NEXT: br label %[[MERGE:.*]]
; CHECK: [[F1]]:
-; CHECK-NEXT: [[MM_VECTORGEP2:%.*]] = getelementptr inbounds [[FOO]], <16 x ptr> [[LANE_15]], <16 x i64> zeroinitializer, <16 x i32> splat (i32 2)
+; CHECK-NEXT: [[MM_VECTORGEP22:%.*]] = getelementptr inbounds i8, <16 x ptr> [[LANE_15]], i64 4
; CHECK-NEXT: br label %[[MERGE]]
; CHECK: [[MERGE]]:
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP]], %[[F0]] ], [ [[MM_VECTORGEP2]], %[[F1]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x ptr> [ [[MM_VECTORGEP1]], %[[F0]] ], [ [[MM_VECTORGEP22]], %[[F1]] ]
; CHECK-NEXT: ret <16 x ptr> [[VEC_PHI]]
;
entry:
diff --git a/llvm/test/Transforms/InstCombine/getelementptr.ll b/llvm/test/Transforms/InstCombine/getelementptr.ll
index 7568c6edc429c..b7d263f5eb320 100644
--- a/llvm/test/Transforms/InstCombine/getelementptr.ll
+++ b/llvm/test/Transforms/InstCombine/getelementptr.ll
@@ -247,11 +247,11 @@ define <2 x i1> @test13_vector2(i64 %X, <2 x ptr> %P) nounwind {
define <2 x i1> @test13_fixed_fixed(i64 %X, ptr %P, <2 x i64> %y) nounwind {
; CHECK-LABEL: @test13_fixed_fixed(
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 0
-; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[DOTSPLATINSERT]], <i64 3, i64 0>
-; CHECK-NEXT: [[A_IDX:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[B_IDX:%.*]] = shl nsw <2 x i64> [[Y:%.*]], splat (i64 4)
-; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i64> [[A_IDX]], [[B_IDX]]
+; CHECK-NEXT: [[A1:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P:%.*]], i64 0, i64 [[X:%.*]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[A1]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x ptr> [[DOTSPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds <2 x i64>, ptr [[P]], <2 x i64> [[Y:%.*]]
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x ptr> [[DOTSPLAT]], [[B]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%A = getelementptr inbounds <2 x i64>, ptr %P, <2 x i64> zeroinitializer, i64 %X
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
index a240dfe7d271a..cbfdebc6cb412 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
@@ -525,9 +525,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {
define ptr @gep_splat_base_w_cv_idx(ptr %base) {
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
-; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
-; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT: [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -539,9 +537,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {
define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
; CHECK-LABEL: @gep_splat_base_w_vidx(
-; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
-; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
+; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -597,10 +594,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
}
define ptr @gep_splat_both(ptr %base, i64 %idx) {
; CHECK-LABEL: @gep_splat_both(
-; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
-; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> poison, ptr %base, i32 0
@@ -641,9 +635,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
;; indices.
define ptr @PR41624(<2 x ptr> %a) {
; CHECK-LABEL: @PR41624(
-; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
-; CHECK-NEXT: ret ptr [[R]]
+; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
+; CHECK-NEXT: [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
+; CHECK-NEXT: ret ptr [[R1]]
;
%w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
%r = extractelement <2 x ptr> %w, i32 0
@@ -657,8 +651,8 @@ define ptr @PR41624(<2 x ptr> %a) {
define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
; CHECK-LABEL: @zero_sized_type_extract(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
-; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
+; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
; CHECK-NEXT: ret ptr [[T2]]
;
bb:
diff --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index ee7ef9955e643..f0ef33dcc6b4e 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -528,9 +528,7 @@ define ptr @gep_splat_base_w_s_idx(ptr %base) {
define ptr @gep_splat_base_w_cv_idx(ptr %base) {
; CHECK-LABEL: @gep_splat_base_w_cv_idx(
-; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> <i64 poison, i64 1>
-; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT: [[EE:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 4
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -542,9 +540,8 @@ define ptr @gep_splat_base_w_cv_idx(ptr %base) {
define ptr @gep_splat_base_w_vidx(ptr %base, <2 x i64> %idxvec) {
; CHECK-LABEL: @gep_splat_base_w_vidx(
-; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
-; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i64> [[IDXVEC:%.*]], i64 1
+; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[TMP1]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -600,10 +597,7 @@ define ptr @gep_sbase_w_splat_idx(ptr %base, i64 %idx) {
}
define ptr @gep_splat_both(ptr %base, i64 %idx) {
; CHECK-LABEL: @gep_splat_both(
-; CHECK-NEXT: [[BASEVEC2:%.*]] = insertelement <2 x ptr> poison, ptr [[BASE:%.*]], i64 1
-; CHECK-NEXT: [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i64 1
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, <2 x ptr> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
-; CHECK-NEXT: [[EE:%.*]] = extractelement <2 x ptr> [[GEP]], i64 1
+; CHECK-NEXT: [[EE:%.*]] = getelementptr i32, ptr [[BASE:%.*]], i64 [[IDX:%.*]]
; CHECK-NEXT: ret ptr [[EE]]
;
%basevec1 = insertelement <2 x ptr> undef, ptr %base, i32 0
@@ -644,9 +638,9 @@ define ptr @gep_demanded_lane_undef(ptr %base, i64 %idx) {
;; indices.
define ptr @PR41624(<2 x ptr> %a) {
; CHECK-LABEL: @PR41624(
-; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> splat (i64 5), <2 x i32> zeroinitializer
-; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W]], i64 0
-; CHECK-NEXT: ret ptr [[R]]
+; CHECK-NEXT: [[R:%.*]] = extractelement <2 x ptr> [[W:%.*]], i64 0
+; CHECK-NEXT: [[R1:%.*]] = getelementptr i8, ptr [[R]], i64 40
+; CHECK-NEXT: ret ptr [[R1]]
;
%w = getelementptr { i32, i32 }, <2 x ptr> %a, <2 x i64> <i64 5, i64 5>, <2 x i32> zeroinitializer
%r = extractelement <2 x ptr> %w, i32 0
@@ -660,8 +654,8 @@ define ptr @PR41624(<2 x ptr> %a) {
define ptr @zero_sized_type_extract(<4 x i64> %arg, i64 %arg1) {
; CHECK-LABEL: @zero_sized_type_extract(
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[T:%.*]] = getelementptr inbounds [0 x i32], <4 x ptr> <ptr @global, ptr poison, ptr poison, ptr poison>, <4 x i64> <i64 0, i64 poison, i64 poison, i64 poison>, <4 x i64> [[ARG:%.*]]
-; CHECK-NEXT: [[T2:%.*]] = extractelement <4 x ptr> [[T]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG:%.*]], i64 0
+; CHECK-NEXT: [[T2:%.*]] = getelementptr inbounds [0 x i32], ptr @global, i64 0, i64 [[TMP0]]
; CHECK-NEXT: ret ptr [[T2]]
;
bb:
diff --git a/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
index 994ecc2246bb6..b882d7dfff91e 100644
--- a/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vector_gep1-inseltpoison.ll
@@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {
define <2 x ptr> @test7(<2 x ptr> %a) {
; CHECK-LABEL: @test7(
-; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
; CHECK-NEXT: ret <2 x ptr> [[W]]
;
%w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/InstCombine/vector_gep1.ll b/llvm/test/Transforms/InstCombine/vector_gep1.ll
index ab99ee71fc8db..92de12213bee1 100644
--- a/llvm/test/Transforms/InstCombine/vector_gep1.ll
+++ b/llvm/test/Transforms/InstCombine/vector_gep1.ll
@@ -55,7 +55,7 @@ define <2 x i1> @test5(<2 x ptr> %a) {
define <2 x ptr> @test7(<2 x ptr> %a) {
; CHECK-LABEL: @test7(
-; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[W:%.*]] = getelementptr { i32, i32 }, <2 x ptr> [[A:%.*]], <2 x i64> <i64 5, i64 9>, i32 0
; CHECK-NEXT: ret <2 x ptr> [[W]]
;
%w = getelementptr {i32, i32}, <2 x ptr> %a, <2 x i32> <i32 5, i32 9>, <2 x i32> zeroinitializer
More information about the llvm-commits
mailing list