[llvm] 9c766f4 - [InstCombine] Fold extractelement + vector GEP with one use

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Wed May 26 01:54:38 PDT 2021


Author: David Sherwood
Date: 2021-05-26T09:54:26+01:00
New Revision: 9c766f4090d19e3e2f56e87164177f8c3eba4b96

URL: https://github.com/llvm/llvm-project/commit/9c766f4090d19e3e2f56e87164177f8c3eba4b96
DIFF: https://github.com/llvm/llvm-project/commit/9c766f4090d19e3e2f56e87164177f8c3eba4b96.diff

LOG: [InstCombine] Fold extractelement + vector GEP with one use

We sometimes see code like this:

Case 1:
  %gep = getelementptr i32, i32* %a, <2 x i64> %splat
  %ext = extractelement <2 x i32*> %gep, i32 0

or this:

Case 2:
  %gep = getelementptr i32, <4 x i32*> %a, i64 1
  %ext = extractelement <4 x i32*> %gep, i32 0

where there is only one use of the GEP. In such cases it makes
sense to fold the two together such that we create a scalar GEP:

Case 1:
  %ext = extractelement <2 x i64> %splat, i32 0
  %gep = getelementptr i32, i32* %a, i64 %ext

Case 2:
  %ext = extractelement <2 x i32*> %a, i32 0
  %gep = getelementptr i32, i32* %ext, i64 1

This may create further folding opportunities as a result, i.e.
the extract of a splat vector can be completely eliminated. Also,
even for the general case where the vector operand is not a splat
it seems beneficial to create a scalar GEP and extract the scalar
element from the operand. Therefore, in this patch I've assumed
that a scalar GEP is always preferrable to a vector GEP and have
added code to unconditionally fold the extract + GEP.

I haven't added folds for the case when we have both a vector of
pointers and a vector of indices, since this would require
generating an additional extractelement operation.

Tests have been added here:

  Transforms/InstCombine/gep-vector-indices.ll

Differential Revision: https://reviews.llvm.org/D101900

Added: 
    llvm/test/Transforms/InstCombine/gep-vector-indices.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
    llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
    llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
    llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
    llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index c5192dc46319b..dfe15bb860aca 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -430,6 +430,47 @@ Instruction *InstCombinerImpl::visitExtractElementInst(ExtractElementInst &EI) {
       // be the same value, extract from the pre-inserted value instead.
       if (isa<Constant>(IE->getOperand(2)) && IndexC)
         return replaceOperand(EI, 0, IE->getOperand(0));
+    } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      auto *VecType = cast<VectorType>(GEP->getType());
+      ElementCount EC = VecType->getElementCount();
+      uint64_t IdxVal = IndexC ? IndexC->getZExtValue() : 0;
+      if (IndexC && IdxVal < EC.getKnownMinValue() && GEP->hasOneUse()) {
+        // Find out why we have a vector result - these are a few examples:
+        //  1. We have a scalar pointer and a vector of indices, or
+        //  2. We have a vector of pointers and a scalar index, or
+        //  3. We have a vector of pointers and a vector of indices, etc.
+        // Here we only consider combining when there is exactly one vector
+        // operand, since the optimization is less obviously a win due to
+        // needing more than one extractelements.
+
+        unsigned VectorOps =
+            llvm::count_if(GEP->operands(), [](const Value *V) {
+              return isa<VectorType>(V->getType());
+            });
+        if (VectorOps > 1)
+          return nullptr;
+        assert(VectorOps == 1 && "Expected exactly one vector GEP operand!");
+
+        Value *NewPtr = GEP->getPointerOperand();
+        if (isa<VectorType>(NewPtr->getType()))
+          NewPtr = Builder.CreateExtractElement(NewPtr, IndexC);
+
+        SmallVector<Value *> NewOps;
+        for (unsigned I = 1; I != GEP->getNumOperands(); ++I) {
+          Value *Op = GEP->getOperand(I);
+          if (auto *OpTy = dyn_cast<VectorType>(Op->getType()))
+            NewOps.push_back(Builder.CreateExtractElement(Op, IndexC));
+          else
+            NewOps.push_back(Op);
+        }
+
+        GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
+            cast<PointerType>(NewPtr->getType())->getElementType(), NewPtr,
+            NewOps);
+        NewGEP->setIsInBounds(GEP->isInBounds());
+        return NewGEP;
+      }
+      return nullptr;
     } else if (auto *SVI = dyn_cast<ShuffleVectorInst>(I)) {
       // If this is extracting an element from a shufflevector, figure out where
       // it came from and extract from the appropriate input element instead.

diff  --git a/llvm/test/Transforms/InstCombine/gep-vector-indices.ll b/llvm/test/Transforms/InstCombine/gep-vector-indices.ll
new file mode 100644
index 0000000000000..e0830f24fd959
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/gep-vector-indices.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine %s -S | FileCheck %s
+
+define i32* @vector_splat_indices_v2i64_ext0(i32* %a) {
+; CHECK-LABEL: @vector_splat_indices_v2i64_ext0(
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 4
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %gep = getelementptr i32, i32* %a, <2 x i64> <i64 4, i64 4>
+  %res = extractelement <2 x i32*> %gep, i32 0
+  ret i32* %res
+}
+
+define i32* @vector_splat_indices_nxv2i64_ext0(i32* %a) {
+; CHECK-LABEL: @vector_splat_indices_nxv2i64_ext0(
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 extractelement (<vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), i32 0)
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %tmp = insertelement <vscale x 2 x i64> poison, i64 4, i32 0
+  %splatof4 = shufflevector <vscale x 2 x i64> %tmp, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+  %gep = getelementptr inbounds i32, i32* %a, <vscale x 2 x i64> %splatof4
+  %res = extractelement <vscale x 2 x i32*> %gep, i32 0
+  ret i32* %res
+}
+
+define i32* @vector_indices_v2i64_ext0(i32* %a, <2 x i64> %indices) {
+; CHECK-LABEL: @vector_indices_v2i64_ext0(
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <2 x i64> [[INDICES:%.*]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %gep = getelementptr i32, i32* %a, <2 x i64> %indices
+  %res = extractelement <2 x i32*> %gep, i32 0
+  ret i32* %res
+}
+
+define i32* @vector_indices_nxv1i64_ext0(i32* %a, <vscale x 1 x i64> %indices) {
+; CHECK-LABEL: @vector_indices_nxv1i64_ext0(
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <vscale x 1 x i64> [[INDICES:%.*]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %gep = getelementptr i32, i32* %a, <vscale x 1 x i64> %indices
+  %res = extractelement <vscale x 1 x i32*> %gep, i32 0
+  ret i32* %res
+}
+
+
+define i32* @vector_splat_ptrs_v2i64_ext0(i32* %a, i64 %index) {
+; CHECK-LABEL: @vector_splat_ptrs_v2i64_ext0(
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %tmp = insertelement <2 x i32*> poison, i32* %a, i32 0
+  %splatofa = shufflevector <2 x i32*> %tmp, <2 x i32*> poison, <2 x i32> zeroinitializer
+  %gep = getelementptr i32, <2 x i32*> %splatofa, i64 %index
+  %res = extractelement <2 x i32*> %gep, i32 0
+  ret i32* %res
+}
+
+
+define i32* @vector_splat_ptrs_nxv2i64_ext0(i32* %a, i64 %index) {
+; CHECK-LABEL: @vector_splat_ptrs_nxv2i64_ext0(
+; CHECK-NEXT:    [[TMP:%.*]] = insertelement <vscale x 2 x i32*> poison, i32* [[A:%.*]], i32 0
+; CHECK-NEXT:    [[SPLATOFA:%.*]] = shufflevector <vscale x 2 x i32*> [[TMP]], <vscale x 2 x i32*> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP0:%.*]] = extractelement <vscale x 2 x i32*> [[SPLATOFA]], i32 0
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[INDEX:%.*]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %tmp = insertelement <vscale x 2 x i32*> poison, i32* %a, i32 0
+  %splatofa = shufflevector <vscale x 2 x i32*> %tmp, <vscale x 2 x i32*> poison, <vscale x 2 x i32> zeroinitializer
+  %gep = getelementptr i32, <vscale x 2 x i32*> %splatofa, i64 %index
+  %res = extractelement <vscale x 2 x i32*> %gep, i32 0
+  ret i32* %res
+}
+
+
+define float* @vector_struct1_splat_indices_v4i64_ext1({float, float}* %a) {
+; CHECK-LABEL: @vector_struct1_splat_indices_v4i64_ext1(
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr { float, float }, { float, float }* [[A:%.*]], i64 4, i32 0
+; CHECK-NEXT:    ret float* [[RES]]
+;
+  %gep = getelementptr {float, float}, {float, float}* %a, <4 x i32> <i32 4, i32 4, i32 4, i32 4>, i32 0
+  %res = extractelement <4 x float*> %gep, i32 1
+  ret float* %res
+}
+
+
+define float* @vector_struct2_splat_indices_v4i64_ext1({float, [8 x float]}* %a) {
+; CHECK-LABEL: @vector_struct2_splat_indices_v4i64_ext1(
+; CHECK-NEXT:    [[RES:%.*]] = getelementptr { float, [8 x float] }, { float, [8 x float] }* [[A:%.*]], i64 2, i32 1, i64 4
+; CHECK-NEXT:    ret float* [[RES]]
+;
+  %gep = getelementptr {float, [8 x float]}, {float, [8 x float]}* %a, i32 2, i32 1, <4 x i32> <i32 4, i32 4, i32 4, i32 4>
+  %res = extractelement <4 x float*> %gep, i32 1
+  ret float* %res
+}
+
+
+; Negative tests
+
+define i32* @vector_indices_nxv2i64_ext3(i32* %a, <vscale x 2 x i64> %indices) {
+; CHECK-LABEL: @vector_indices_nxv2i64_ext3(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], <vscale x 2 x i64> [[INDICES:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = extractelement <vscale x 2 x i32*> [[GEP]], i32 3
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %gep = getelementptr i32, i32* %a, <vscale x 2 x i64> %indices
+  %res = extractelement <vscale x 2 x i32*> %gep, i32 3
+  ret i32* %res
+}
+
+define i32* @vector_indices_nxv2i64_extN(i32* %a, <vscale x 2 x i64> %indices, i32 %N) {
+; CHECK-LABEL: @vector_indices_nxv2i64_extN(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], <vscale x 2 x i64> [[INDICES:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = extractelement <vscale x 2 x i32*> [[GEP]], i32 [[N:%.*]]
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %gep = getelementptr i32, i32* %a, <vscale x 2 x i64> %indices
+  %res = extractelement <vscale x 2 x i32*> %gep, i32 %N
+  ret i32* %res
+}
+
+define void @vector_indices_nxv2i64_mulitple_use(i32* %a, <vscale x 2 x i64> %indices, i32** %b, i32** %c) {
+; CHECK-LABEL: @vector_indices_nxv2i64_mulitple_use(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], <vscale x 2 x i64> [[INDICES:%.*]]
+; CHECK-NEXT:    [[LANE0:%.*]] = extractelement <vscale x 2 x i32*> [[GEP]], i32 0
+; CHECK-NEXT:    [[LANE1:%.*]] = extractelement <vscale x 2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    store i32* [[LANE0]], i32** [[B:%.*]], align 8
+; CHECK-NEXT:    store i32* [[LANE1]], i32** [[C:%.*]], align 8
+; CHECK-NEXT:    ret void
+;
+  %gep = getelementptr i32, i32* %a, <vscale x 2 x i64> %indices
+  %lane0 = extractelement <vscale x 2 x i32*> %gep, i32 0
+  %lane1 = extractelement <vscale x 2 x i32*> %gep, i32 1
+  store i32* %lane0, i32** %b, align 8
+  store i32* %lane1, i32** %c, align 8
+  ret void
+}
+
+define i32* @vector_ptrs_and_indices_ext0(<vscale x 4 x i32*> %a, <vscale x 4 x i64> %indices) {
+; CHECK-LABEL: @vector_ptrs_and_indices_ext0(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <vscale x 4 x i32*> [[A:%.*]], <vscale x 4 x i64> [[INDICES:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = extractelement <vscale x 4 x i32*> [[GEP]], i32 0
+; CHECK-NEXT:    ret i32* [[RES]]
+;
+  %gep = getelementptr i32, <vscale x 4 x i32*> %a, <vscale x 4 x i64> %indices
+  %res = extractelement <vscale x 4 x i32*> %gep, i32 0
+  ret i32* %res
+}

diff  --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
index afa9d9cd1c7b8..b18e930dde187 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll
@@ -499,13 +499,10 @@ define <3 x float> @shuf_frem_const_op1(<3 x float> %x) {
   ret <3 x float> %r
 }
 
-;; TODO: getelementptr tests below show missing simplifications for
-;; vector demanded elements on vector geps.
-
 define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) {
 ; CHECK-LABEL: @gep_vbase_w_s_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 [[INDEX:%.*]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 %index
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %gep = getelementptr i32, <2 x i32*> %base, i64 %index
@@ -515,9 +512,7 @@ define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) {
 
 define i32* @gep_splat_base_w_s_idx(i32* %base) {
 ; CHECK-LABEL: @gep_splat_base_w_s_idx(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> poison, i32* [[BASE:%.*]], i32 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %basevec1 = insertelement <2 x i32*> poison, i32* %base, i32 0
@@ -561,8 +556,7 @@ define i32* @gep_splat_base_w_vidx(i32* %base, <2 x i64> %idxvec) {
 
 define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) {
 ; CHECK-LABEL: @gep_cvbase_w_s_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> <i32* poison, i32* @GLOBAL>, i64 [[RAW_ADDR:%.*]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]]
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, i64 %raw_addr
@@ -582,8 +576,7 @@ define i32* @gep_cvbase_w_cv_idx(<2 x i32*> %base, i64 %raw_addr) {
 
 define i32* @gep_sbase_w_cv_idx(i32* %base) {
 ; CHECK-LABEL: @gep_sbase_w_cv_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> <i64 poison, i64 1>
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %gep = getelementptr i32, i32* %base, <2 x i64> <i64 0, i64 1>
@@ -593,9 +586,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) {
 
 define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) {
 ; CHECK-LABEL: @gep_sbase_w_splat_idx(
-; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> poison, i64 [[IDX:%.*]], i32 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]]
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %idxvec1 = insertelement <2 x i64> poison, i64 %idx, i32 0

diff  --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index f0c46586b554d..46c41d0e26955 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -499,13 +499,10 @@ define <3 x float> @shuf_frem_const_op1(<3 x float> %x) {
   ret <3 x float> %r
 }
 
-;; TODO: getelementptr tests below show missing simplifications for
-;; vector demanded elements on vector geps.
-
 define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) {
 ; CHECK-LABEL: @gep_vbase_w_s_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASE:%.*]], i64 [[INDEX:%.*]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i32*> [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[TMP1]], i64 %index
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %gep = getelementptr i32, <2 x i32*> %base, i64 %index
@@ -515,9 +512,7 @@ define i32* @gep_vbase_w_s_idx(<2 x i32*> %base, i64 %index) {
 
 define i32* @gep_splat_base_w_s_idx(i32* %base) {
 ; CHECK-LABEL: @gep_splat_base_w_s_idx(
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %basevec1 = insertelement <2 x i32*> undef, i32* %base, i32 0
@@ -561,8 +556,7 @@ define i32* @gep_splat_base_w_vidx(i32* %base, <2 x i64> %idxvec) {
 
 define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) {
 ; CHECK-LABEL: @gep_cvbase_w_s_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> <i32* poison, i32* @GLOBAL>, i64 [[RAW_ADDR:%.*]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* @GLOBAL, i64 [[RAW_ADDR:%.*]]
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, i64 %raw_addr
@@ -582,8 +576,7 @@ define i32* @gep_cvbase_w_cv_idx(<2 x i32*> %base, i64 %raw_addr) {
 
 define i32* @gep_sbase_w_cv_idx(i32* %base) {
 ; CHECK-LABEL: @gep_sbase_w_cv_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> <i64 poison, i64 1>
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %gep = getelementptr i32, i32* %base, <2 x i64> <i64 0, i64 1>
@@ -593,9 +586,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %base) {
 
 define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) {
 ; CHECK-LABEL: @gep_sbase_w_splat_idx(
-; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]]
-; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
+; CHECK-NEXT:    [[EE:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[IDX:%.*]]
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
   %idxvec1 = insertelement <2 x i64> undef, i64 %idx, i32 0

diff  --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
index 788d79262fa34..4250a3911b823 100644
--- a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
+++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg-inseltpoison.ll
@@ -3,9 +3,9 @@
 
 define <4 x i16*> @PR41270([4 x i16]* %x) {
 ; CHECK-LABEL: @PR41270(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3
-; CHECK-NEXT:    ret <4 x i16*> [[TMP2]]
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* [[X:%.*]], i64 0, i64 3
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x i16*> poison, i16* [[T3]], i32 0
+; CHECK-NEXT:    ret <4 x i16*> [[INS2]]
 ;
   %ins = insertelement <4 x [4 x i16]*> poison, [4 x i16]* %x, i32 0
   %splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> poison, <4 x i32> zeroinitializer

diff  --git a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
index 33ed7cb6a7fde..d042d3bbc127f 100644
--- a/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
+++ b/llvm/test/Transforms/InstCombine/vec_gep_scalar_arg.ll
@@ -3,9 +3,9 @@
 
 define <4 x i16*> @PR41270([4 x i16]* %x) {
 ; CHECK-LABEL: @PR41270(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x [4 x i16]*> undef, [4 x i16]* [[X:%.*]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [4 x i16], <4 x [4 x i16]*> [[TMP1]], i64 0, i64 3
-; CHECK-NEXT:    ret <4 x i16*> [[TMP2]]
+; CHECK-NEXT:    [[T3:%.*]] = getelementptr inbounds [4 x i16], [4 x i16]* [[X:%.*]], i64 0, i64 3
+; CHECK-NEXT:    [[INS2:%.*]] = insertelement <4 x i16*> undef, i16* [[T3]], i32 0
+; CHECK-NEXT:    ret <4 x i16*> [[INS2]]
 ;
   %ins = insertelement <4 x [4 x i16]*> undef, [4 x i16]* %x, i32 0
   %splat = shufflevector <4 x [4 x i16]*> %ins, <4 x [4 x i16]*> undef, <4 x i32> zeroinitializer


        


More information about the llvm-commits mailing list