[llvm] 843c0ad - [ConstantFold] Fold extractelement of getelementptr

Mon Oct 28 11:32:54 PDT 2019

Author: Jay Foad
Date: 2019-10-28T18:32:39Z
New Revision: 843c0adf0f7449a4167d20b399f70f6943d21d5e

URL: https://github.com/llvm/llvm-project/commit/843c0adf0f7449a4167d20b399f70f6943d21d5e
DIFF: https://github.com/llvm/llvm-project/commit/843c0adf0f7449a4167d20b399f70f6943d21d5e.diff

LOG: [ConstantFold] Fold extractelement of getelementptr

Summary:
Getelementptr has vector type if any of its operands are vectors
(the scalar operands being implicitly broadcast to all vector elements).
Extractelement applied to a vector getelementptr can be folded by
applying the extractelement in turn to all of the vector operands.

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69379

Added: 
    

Modified: 
    llvm/lib/IR/ConstantFold.cpp
    llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll
    llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
    llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index 71fa795ec294..84b117de21e6 100644

--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -792,13 +792,36 @@ Constant *llvm::ConstantFoldExtractElementInstruction(Constant *Val,
   if (isa<UndefValue>(Val) || isa<UndefValue>(Idx))
     return UndefValue::get(Val->getType()->getVectorElementType());
 
-  if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Idx)) {
-    // ee({w,x,y,z}, wrong_value) -> undef
-    if (CIdx->uge(Val->getType()->getVectorNumElements()))
-      return UndefValue::get(Val->getType()->getVectorElementType());
-    return Val->getAggregateElement(CIdx->getZExtValue());
+  auto *CIdx = dyn_cast<ConstantInt>(Idx);
+  if (!CIdx)
+    return nullptr;
+
+  // ee({w,x,y,z}, wrong_value) -> undef
+  if (CIdx->uge(Val->getType()->getVectorNumElements()))
+    return UndefValue::get(Val->getType()->getVectorElementType());
+
+  // ee (gep (ptr, idx0, ...), idx) -> gep (ee (ptr, idx), ee (idx0, idx), ...)
+  if (auto *CE = dyn_cast<ConstantExpr>(Val)) {
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      SmallVector<Constant *, 8> Ops;
+      Ops.reserve(CE->getNumOperands());
+      for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
+        Constant *Op = CE->getOperand(i);
+        if (Op->getType()->isVectorTy()) {
+          Constant *ScalarOp = ConstantFoldExtractElementInstruction(Op, Idx);
+          if (!ScalarOp)
+            return  nullptr;
+          Ops.push_back(ScalarOp);
+        } else
+          Ops.push_back(Op);
+      }
+      return CE->getWithOperands(Ops, CE->getType()->getVectorElementType(),
+                                 false,
+                                 Ops[0]->getType()->getPointerElementType());
+    }
   }
-  return nullptr;
+
+  return Val->getAggregateElement(CIdx);
 }
 
 Constant *llvm::ConstantFoldInsertElementInstruction(Constant *Val,

diff  --git a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll
index bb5fcbdb8ba5..03d27e9fb6c5 100644
--- a/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll
+++ b/llvm/test/Analysis/ConstantFolding/gep-zeroinit-vector.ll
@@ -9,7 +9,7 @@
 
 define <2 x i16*> @test_gep() {
 ; CHECK-LABEL: @test_gep(
-; CHECK-NEXT:    ret <2 x i16*> <i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i64> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>
+; CHECK-NEXT:    ret <2 x i16*> <i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0), i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0)>
 ;
   %A = getelementptr [1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer
   %B = bitcast <2 x %rec8*> %A to <2 x i16*>

diff  --git a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
index 164061de5112..0c3f60861430 100644
--- a/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/llvm/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -569,7 +569,7 @@ define i32* @gep_cvbase_w_s_idx(<2 x i32*> %base, i64 %raw_addr) {
 
 define i32* @gep_cvbase_w_cv_idx(<2 x i32*> %base, i64 %raw_addr) {
 ; CHECK-LABEL: @gep_cvbase_w_cv_idx(
-; CHECK-NEXT:    ret i32* extractelement (<2 x i32*> getelementptr (i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, <2 x i64> <i64 0, i64 1>), i32 1)
+; CHECK-NEXT:    ret i32* getelementptr inbounds (i32, i32* @GLOBAL, i64 1)
 ;
   %gep = getelementptr i32, <2 x i32*> <i32* @GLOBAL, i32* @GLOBAL>, <2 x i64> <i64 0, i64 1>
   %ee = extractelement <2 x i32*> %gep, i32 1

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
index a73d91ba30b7..360a52c619bc 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll
@@ -27,7 +27,7 @@ define void @f1() {
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [2 x i16*], [2 x i16*]* @b, i16 0, i64 [[TMP1]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i16*, i16** [[TMP2]], i32 0
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16** [[TMP3]] to <2 x i16*>*
-; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 0), i32 0, i32 0), i16* getelementptr inbounds (%rec8, %rec8* extractelement (<2 x %rec8*> getelementptr ([1 x %rec8], [1 x %rec8]* @a, <2 x i16> zeroinitializer, <2 x i64> zeroinitializer), i32 1), i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8
+; CHECK-NEXT:    store <2 x i16*> <i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0), i16* getelementptr inbounds ([1 x %rec8], [1 x %rec8]* @a, i32 0, i32 0, i32 0)>, <2 x i16*>* [[TMP4]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 2
 ; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0