[llvm] de65b35 - [VectorCombine] add/use pass-level IRBuilder

Mon Jun 22 06:01:48 PDT 2020

Author: Sanjay Patel
Date: 2020-06-22T09:01:29-04:00
New Revision: de65b356dc2d9730b02ac21dd308cdc3ced0feac

URL: https://github.com/llvm/llvm-project/commit/de65b356dc2d9730b02ac21dd308cdc3ced0feac
DIFF: https://github.com/llvm/llvm-project/commit/de65b356dc2d9730b02ac21dd308cdc3ced0feac.diff

LOG: [VectorCombine] add/use pass-level IRBuilder

This saves creating/destroying a builder every time we
perform some transform.

The tests show instruction ordering diffs resulting from
always inserting at the root instruction now, but those
should be benign.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
    llvm/test/Transforms/VectorCombine/X86/extract-binop.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index cb7027884dd9..3935400113f1 100644

--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -51,12 +51,13 @@ class VectorCombine {
 public:
   VectorCombine(Function &F, const TargetTransformInfo &TTI,
                 const DominatorTree &DT)
-      : F(F), TTI(TTI), DT(DT) {}
+      : F(F), Builder(F.getContext()), TTI(TTI), DT(DT) {}
 
   bool run();
 
 private:
   Function &F;
+  IRBuilder<> Builder;
   const TargetTransformInfo &TTI;
   const DominatorTree &DT;
 
@@ -64,6 +65,12 @@ class VectorCombine {
                              unsigned Opcode,
                              ExtractElementInst *&ConvertToShuffle,
                              unsigned PreferredExtractIndex);
+  ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
+                                       unsigned NewIndex);
+  void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
+                     Instruction &I);
+  void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
+                       Instruction &I);
   bool foldExtractExtract(Instruction &I);
   bool foldBitcastShuf(Instruction &I);
   bool scalarizeBinopOrCmp(Instruction &I);
@@ -182,12 +189,13 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0,
 /// the source vector (shift the scalar element) to a NewIndex for extraction.
 /// Return null if the input can be constant folded, so that we are not creating
 /// unnecessary instructions.
-static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
-                                            unsigned NewIndex) {
+ExtractElementInst *VectorCombine::translateExtract(ExtractElementInst *ExtElt,
+                                                    unsigned NewIndex) {
   // If the extract can be constant-folded, this code is unsimplified. Defer
   // to other passes to handle that.
   Value *X = ExtElt->getVectorOperand();
   Value *C = ExtElt->getIndexOperand();
+  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
   if (isa<Constant>(X))
     return nullptr;
 
@@ -196,11 +204,9 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
   // ShufMask = { 2, undef, undef, undef }
   auto *VecTy = cast<FixedVectorType>(X->getType());
   SmallVector<int, 32> Mask(VecTy->getNumElements(), -1);
-  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
   Mask[NewIndex] = cast<ConstantInt>(C)->getZExtValue();
 
   // extelt X, C --> extelt (shuffle X), NewIndex
-  IRBuilder<> Builder(ExtElt);
   Value *Shuf =
       Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask, "shift");
   return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
@@ -209,8 +215,8 @@ static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
 /// Try to reduce extract element costs by converting scalar compares to vector
 /// compares followed by extract.
 /// cmp (ext0 V0, C), (ext1 V1, C)
-static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
-                          Instruction &I) {
+void VectorCombine::foldExtExtCmp(ExtractElementInst *Ext0,
+                                  ExtractElementInst *Ext1, Instruction &I) {
   assert(isa<CmpInst>(&I) && "Expected a compare");
   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
@@ -218,7 +224,6 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
 
   // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
   ++NumVecCmp;
-  IRBuilder<> Builder(&I);
   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
@@ -230,8 +235,8 @@ static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
 /// Try to reduce extract element costs by converting scalar binops to vector
 /// binops followed by extract.
 /// bo (ext0 V0, C), (ext1 V1, C)
-static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
-                            Instruction &I) {
+void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0,
+                                    ExtractElementInst *Ext1, Instruction &I) {
   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
   assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
              cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
@@ -239,7 +244,6 @@ static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
 
   // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
   ++NumVecBO;
-  IRBuilder<> Builder(&I);
   Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
   Value *VecBO =
       Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
@@ -353,7 +357,6 @@ bool VectorCombine::foldBitcastShuf(Instruction &I) {
   }
   // bitcast (shuf V, MaskC) --> shuf (bitcast V), MaskC'
   ++NumShufOfBitcast;
-  IRBuilder<> Builder(&I);
   Value *CastV = Builder.CreateBitCast(V, DestTy);
   Value *Shuf =
       Builder.CreateShuffleVector(CastV, UndefValue::get(DestTy), NewMask);
@@ -454,7 +457,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) {
     ++NumScalarBO;
 
   // For constant cases, extract the scalar element, this should constant fold.
-  IRBuilder<> Builder(&I);
   if (IsConst0)
     V0 = ConstantExpr::getExtractElement(VecC0, Builder.getInt64(Index));
   if (IsConst1)
@@ -498,6 +500,7 @@ bool VectorCombine::run() {
     for (Instruction &I : BB) {
       if (isa<DbgInfoIntrinsic>(I))
         continue;
+      Builder.SetInsertPoint(&I);
       MadeChange |= foldExtractExtract(I);
       MadeChange |= foldBitcastShuf(I);
       MadeChange |= scalarizeBinopOrCmp(I);

diff  --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
index a3280a870893..1cceb48da206 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll
@@ -41,11 +41,11 @@ define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) {
 
 define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32(
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y:%.*]]
+; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[SHIFT1]]
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> undef, <4 x i32> <i32 2, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP3:%.*]] = add <4 x i32> [[TMP2]], [[SHIFT2]]
 ; CHECK-NEXT:    [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i32 0
 ; CHECK-NEXT:    ret i32 [[X2Y210]]

diff  --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index af33850b8953..ac7ca1e8f8e2 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -454,12 +454,12 @@ define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) {
 define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @PR34724(
 ; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 3, i32 undef>
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
 ; CHECK-NEXT:    [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
 ; CHECK-NEXT:    [[B01:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> undef, <4 x i32> <i32 undef, i32 undef, i32 undef, i32 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
 ; CHECK-NEXT:    [[B23:%.*]] = extractelement <4 x float> [[TMP3]], i64 3
 ; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x float> undef, float [[A23]], i32 1