[llvm] r352440 - Demanded elements support for vector GEPs

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 28 15:24:50 PST 2019


Author: reames
Date: Mon Jan 28 15:24:49 2019
New Revision: 352440

URL: http://llvm.org/viewvc/llvm-project?rev=352440&view=rev
Log:
Demanded elements support for vector GEPs

GEPs can produce either scalar or vector results. If we're extracting only a subset of the vector lanes, simplifying the operands is helpful in eliminating redundant computation, and (eventually) allowing further optimizations

Differential Revision: https://reviews.llvm.org/D57177


Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
    llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp?rev=352440&r1=352439&r2=352440&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp Mon Jan 28 15:24:49 2019
@@ -1183,6 +1183,18 @@ Value *InstCombiner::SimplifyDemandedVec
   switch (I->getOpcode()) {
   default: break;
 
+  case Instruction::GetElementPtr: {
+    // Conservatively track the demanded elements back through any vector
+    // operands we may have.  We know there must be at least one, or we
+    // wouldn't have a vector result to get here. Note that we intentionally
+    // merge the undef bits here since gepping with either an undef base or
+    // index results in undef. 
+    for (unsigned i = 0; i < I->getNumOperands(); i++)
+      if (I->getOperand(i)->getType()->isVectorTy())
+        simplifyAndSetOp(I, i, DemandedElts, UndefElts);
+
+    break;
+  }
   case Instruction::InsertElement: {
     // If this is a variable index, we don't know which element it overwrites.
     // demand exactly the same input as we produce.

Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=352440&r1=352439&r2=352440&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Mon Jan 28 15:24:49 2019
@@ -512,8 +512,7 @@ define i32* @gep_vbase_w_s_idx(<2 x i32*
 
 define i32* @gep_splat_base_w_s_idx(i32* %base) {
 ; CHECK-LABEL: @gep_splat_base_w_s_idx(
-; CHECK-NEXT:    [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], i64 1
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]
@@ -528,9 +527,8 @@ define i32* @gep_splat_base_w_s_idx(i32*
 
 define i32* @gep_splat_base_w_cv_idx(i32* %base) {
 ; CHECK-LABEL: @gep_splat_base_w_cv_idx(
-; CHECK-NEXT:    [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> <i64 undef, i64 1>
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
@@ -543,8 +541,7 @@ define i32* @gep_splat_base_w_cv_idx(i32
 
 define i32* @gep_splat_base_w_vidx(i32* %base, <2 x i64> %idxvec) {
 ; CHECK-LABEL: @gep_splat_base_w_vidx(
-; CHECK-NEXT:    [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC:%.*]]
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]
@@ -561,7 +558,7 @@ define i32* @gep_splat_base_w_vidx(i32*
 
 define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) {
 ; CHECK-LABEL: @gep_cvbase_w_s_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, i64 [[RAW_ADDR:%.*]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> <i32* undef, i32* @GLOBAL>, i64 [[RAW_ADDR:%.*]]
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
@@ -582,7 +579,7 @@ define i32* @gep_cvbase_w_cv_idx(<2 x i3
 
 define i32* @gep_sbase_w_cv_idx(i32* %base) {
 ; CHECK-LABEL: @gep_sbase_w_cv_idx(
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> <i64 0, i64 1>
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> <i64 undef, i64 1>
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]
 ;
@@ -593,8 +590,7 @@ define i32* @gep_sbase_w_cv_idx(i32* %ba
 
 define i32* @gep_sbase_w_splat_idx(i32* %base, i64 %idx) {
 ; CHECK-LABEL: @gep_sbase_w_splat_idx(
-; CHECK-NEXT:    [[IDXVEC1:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 0
-; CHECK-NEXT:    [[IDXVEC2:%.*]] = shufflevector <2 x i64> [[IDXVEC1]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <2 x i64> [[IDXVEC2]]
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]
@@ -607,10 +603,8 @@ define i32* @gep_sbase_w_splat_idx(i32*
 }
 define i32* @gep_splat_both(i32* %base, i64 %idx) {
 ; CHECK-LABEL: @gep_splat_both(
-; CHECK-NEXT:    [[BASEVEC1:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 0
-; CHECK-NEXT:    [[BASEVEC2:%.*]] = shufflevector <2 x i32*> [[BASEVEC1]], <2 x i32*> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    [[IDXVEC1:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 0
-; CHECK-NEXT:    [[IDXVEC2:%.*]] = shufflevector <2 x i64> [[IDXVEC1]], <2 x i64> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[BASEVEC2:%.*]] = insertelement <2 x i32*> undef, i32* [[BASE:%.*]], i32 1
+; CHECK-NEXT:    [[IDXVEC2:%.*]] = insertelement <2 x i64> undef, i64 [[IDX:%.*]], i32 1
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, <2 x i32*> [[BASEVEC2]], <2 x i64> [[IDXVEC2]]
 ; CHECK-NEXT:    [[EE:%.*]] = extractelement <2 x i32*> [[GEP]], i32 1
 ; CHECK-NEXT:    ret i32* [[EE]]




More information about the llvm-commits mailing list