[llvm] 216a37b - [VectorCombine] refactor extract-extract logic; NFCI

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 19 12:01:57 PDT 2020


Author: Sanjay Patel
Date: 2020-06-19T14:52:27-04:00
New Revision: 216a37bb4643279b548e85d98618a69475f1328c

URL: https://github.com/llvm/llvm-project/commit/216a37bb4643279b548e85d98618a69475f1328c
DIFF: https://github.com/llvm/llvm-project/commit/216a37bb4643279b548e85d98618a69475f1328c.diff

LOG: [VectorCombine] refactor extract-extract logic; NFCI

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 2ea19493a69e..a133d5433a57 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -53,10 +53,10 @@ static cl::opt<bool> DisableBinopExtractShuffle(
 /// instructions are cheaper than a vector alternative. Otherwise, return false
 /// and if one of the extracts should be transformed to a shufflevector, set
 /// \p ConvertToShuffle to that extract instruction.
-static bool isExtractExtractCheap(Instruction *Ext0, Instruction *Ext1,
-                                  unsigned Opcode,
+static bool isExtractExtractCheap(ExtractElementInst *Ext0,
+                                  ExtractElementInst *Ext1, unsigned Opcode,
                                   const TargetTransformInfo &TTI,
-                                  Instruction *&ConvertToShuffle,
+                                  ExtractElementInst *&ConvertToShuffle,
                                   unsigned PreferredExtractIndex) {
   assert(isa<ConstantInt>(Ext0->getOperand(1)) &&
          isa<ConstantInt>(Ext1->getOperand(1)) &&
@@ -157,34 +157,67 @@ static bool isExtractExtractCheap(Instruction *Ext0, Instruction *Ext1,
   return OldCost < NewCost;
 }
 
+/// Given an extract element instruction with constant index operand, shuffle
+/// the source vector (shift the scalar element) to a NewIndex for extraction.
+/// Return null if the input can be constant folded, so that we are not creating
+/// unnecessary instructions.
+static ExtractElementInst *translateExtract(ExtractElementInst *ExtElt,
+                                            unsigned NewIndex) {
+  // If the extract can be constant-folded, this code is unsimplified. Defer
+  // to other passes to handle that.
+  Value *X = ExtElt->getVectorOperand();
+  Value *C = ExtElt->getIndexOperand();
+  if (isa<Constant>(X))
+    return nullptr;
+
+  // The shuffle mask is undefined except for 1 lane that is being translated
+  // to the cheap extraction lane. Example:
+  // ShufMask = { 2, undef, undef, undef }
+  auto *VecTy = cast<FixedVectorType>(X->getType());
+  SmallVector<int, 32> Mask(VecTy->getNumElements(), -1);
+  assert(isa<ConstantInt>(C) && "Expected a constant index operand");
+  Mask[NewIndex] = cast<ConstantInt>(C)->getZExtValue();
+
+  // extelt X, C --> extelt (shuffle X), NewIndex
+  IRBuilder<> Builder(ExtElt);
+  Value *Shuf = Builder.CreateShuffleVector(X, UndefValue::get(VecTy), Mask);
+  return cast<ExtractElementInst>(Builder.CreateExtractElement(Shuf, NewIndex));
+}
+
 /// Try to reduce extract element costs by converting scalar compares to vector
 /// compares followed by extract.
 /// cmp (ext0 V0, C), (ext1 V1, C)
-static void foldExtExtCmp(Instruction *Ext0, Instruction *Ext1,
+static void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
                           Instruction &I) {
   assert(isa<CmpInst>(&I) && "Expected a compare");
+  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
+             cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
+         "Expected matching constant extract indexes");
 
   // cmp Pred (extelt V0, C), (extelt V1, C) --> extelt (cmp Pred V0, V1), C
   ++NumVecCmp;
   IRBuilder<> Builder(&I);
   CmpInst::Predicate Pred = cast<CmpInst>(&I)->getPredicate();
-  Value *V0 = Ext0->getOperand(0), *V1 = Ext1->getOperand(0);
+  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
   Value *VecCmp = Builder.CreateCmp(Pred, V0, V1);
-  Value *Extract = Builder.CreateExtractElement(VecCmp, Ext0->getOperand(1));
-  I.replaceAllUsesWith(Extract);
+  Value *NewExt = Builder.CreateExtractElement(VecCmp, Ext0->getIndexOperand());
+  I.replaceAllUsesWith(NewExt);
 }
 
 /// Try to reduce extract element costs by converting scalar binops to vector
 /// binops followed by extract.
 /// bo (ext0 V0, C), (ext1 V1, C)
-static void foldExtExtBinop(Instruction *Ext0, Instruction *Ext1,
+static void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1,
                             Instruction &I) {
   assert(isa<BinaryOperator>(&I) && "Expected a binary operator");
+  assert(cast<ConstantInt>(Ext0->getIndexOperand())->getZExtValue() ==
+             cast<ConstantInt>(Ext1->getIndexOperand())->getZExtValue() &&
+         "Expected matching constant extract indexes");
 
   // bo (extelt V0, C), (extelt V1, C) --> extelt (bo V0, V1), C
   ++NumVecBO;
   IRBuilder<> Builder(&I);
-  Value *V0 = Ext0->getOperand(0), *V1 = Ext1->getOperand(0);
+  Value *V0 = Ext0->getVectorOperand(), *V1 = Ext1->getVectorOperand();
   Value *VecBO =
       Builder.CreateBinOp(cast<BinaryOperator>(&I)->getOpcode(), V0, V1);
 
@@ -193,8 +226,8 @@ static void foldExtExtBinop(Instruction *Ext0, Instruction *Ext1,
   if (auto *VecBOInst = dyn_cast<Instruction>(VecBO))
     VecBOInst->copyIRFlags(&I);
 
-  Value *Extract = Builder.CreateExtractElement(VecBO, Ext0->getOperand(1));
-  I.replaceAllUsesWith(Extract);
+  Value *NewExt = Builder.CreateExtractElement(VecBO, Ext0->getIndexOperand());
+  I.replaceAllUsesWith(NewExt);
 }
 
 /// Match an instruction with extracted vector operands.
@@ -204,16 +237,16 @@ static bool foldExtractExtract(Instruction &I, const TargetTransformInfo &TTI) {
   if (!isSafeToSpeculativelyExecute(&I))
     return false;
 
-  Instruction *Ext0, *Ext1;
+  Instruction *I0, *I1;
   CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE;
-  if (!match(&I, m_Cmp(Pred, m_Instruction(Ext0), m_Instruction(Ext1))) &&
-      !match(&I, m_BinOp(m_Instruction(Ext0), m_Instruction(Ext1))))
+  if (!match(&I, m_Cmp(Pred, m_Instruction(I0), m_Instruction(I1))) &&
+      !match(&I, m_BinOp(m_Instruction(I0), m_Instruction(I1))))
     return false;
 
   Value *V0, *V1;
   uint64_t C0, C1;
-  if (!match(Ext0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
-      !match(Ext1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
+  if (!match(I0, m_ExtractElt(m_Value(V0), m_ConstantInt(C0))) ||
+      !match(I1, m_ExtractElt(m_Value(V1), m_ConstantInt(C1))) ||
       V0->getType() != V1->getType())
     return false;
 
@@ -222,40 +255,28 @@ static bool foldExtractExtract(Instruction &I, const TargetTransformInfo &TTI) {
   // reduced to a "select shuffle".
   // TODO: If we add a larger pattern match that starts from an insert, this
   //       probably becomes unnecessary.
+  auto *Ext0 = cast<ExtractElementInst>(I0);
+  auto *Ext1 = cast<ExtractElementInst>(I1);
   uint64_t InsertIndex = std::numeric_limits<uint64_t>::max();
   if (I.hasOneUse())
     match(I.user_back(),
           m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex)));
 
-  Instruction *ConvertToShuffle;
-  if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), TTI, ConvertToShuffle,
+  ExtractElementInst *ExtractToChange;
+  if (isExtractExtractCheap(Ext0, Ext1, I.getOpcode(), TTI, ExtractToChange,
                             InsertIndex))
     return false;
 
-  if (ConvertToShuffle) {
-    // If the extract can be constant-folded, this code is unsimplified. Defer
-    // to other passes to handle that.
-    if (isa<Constant>(ConvertToShuffle->getOperand(0)))
+  if (ExtractToChange) {
+    unsigned CheapExtractIdx = ExtractToChange == Ext0 ? C1 : C0;
+    ExtractElementInst *NewExtract =
+        translateExtract(ExtractToChange, CheapExtractIdx);
+    if (!NewExtract)
       return false;
-
-    // The shuffle mask is undefined except for 1 lane that is being translated
-    // to the cheap extraction lane. Example:
-    // ShufMask = { 2, undef, undef, undef }
-    uint64_t SplatIndex = ConvertToShuffle == Ext0 ? C0 : C1;
-    uint64_t CheapExtIndex = ConvertToShuffle == Ext0 ? C1 : C0;
-    auto *VecTy = cast<VectorType>(V0->getType());
-    SmallVector<int, 32> ShufMask(VecTy->getNumElements(), -1);
-    ShufMask[CheapExtIndex] = SplatIndex;
-    IRBuilder<> Builder(ConvertToShuffle);
-
-    // extelt X, C --> extelt (splat X), C'
-    Value *Shuf = Builder.CreateShuffleVector(ConvertToShuffle->getOperand(0),
-                                              UndefValue::get(VecTy), ShufMask);
-    Value *NewExt = Builder.CreateExtractElement(Shuf, CheapExtIndex);
-    if (ConvertToShuffle == Ext0)
-      Ext0 = cast<Instruction>(NewExt);
+    if (ExtractToChange == Ext0)
+      Ext0 = NewExtract;
     else
-      Ext1 = cast<Instruction>(NewExt);
+      Ext1 = NewExtract;
   }
 
   if (Pred != CmpInst::BAD_ICMP_PREDICATE)


        


More information about the llvm-commits mailing list