[llvm] beacf9b - [SLP]Fix PR57322: vectorize constant float stores.

Mon Aug 29 11:04:11 PDT 2022

Author: Alexey Bataev
Date: 2022-08-29T11:02:53-07:00
New Revision: beacf9bd9e1ac0b609cb76bfb66dd9385bbed5d1

URL: https://github.com/llvm/llvm-project/commit/beacf9bd9e1ac0b609cb76bfb66dd9385bbed5d1
DIFF: https://github.com/llvm/llvm-project/commit/beacf9bd9e1ac0b609cb76bfb66dd9385bbed5d1.diff

LOG: [SLP]Fix PR57322: vectorize constant float stores.

Stores for constant floats must be vectorized, improve analysis in SLP
vectorizer for stores.

Differential Revision: https://reviews.llvm.org/D132750

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3ef9ecb7f462..7fa9359e705b 100644

--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2125,8 +2125,9 @@ class BoUpSLP {
 
   /// Return information about the vector formed for the specified index
   /// of a vector of (the same) instruction.
+  /// \param EnableFP - If true, check for float constants.
   TargetTransformInfo::OperandValueInfo
-  getOperandInfo(ArrayRef<Value *> VL, unsigned OpIdx);
+  getOperandInfo(ArrayRef<Value *> VL, unsigned OpIdx, bool EnableFP);
 
   /// \returns the cost of the vectorizable entry.
   InstructionCost getEntryCost(const TreeEntry *E,
@@ -5814,30 +5815,44 @@ static bool isAlternateInstruction(const Instruction *I,
   return I->getOpcode() == AltOp->getOpcode();
 }
 
-TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> VL, unsigned OpIdx) {
-
+TTI::OperandValueInfo BoUpSLP::getOperandInfo(ArrayRef<Value *> VL,
+                                              unsigned OpIdx, bool EnableFP) {
   TTI::OperandValueKind VK = TTI::OK_UniformConstantValue;
   TTI::OperandValueProperties VP = TTI::OP_PowerOf2;
 
+  // If all float operands are constants then set the operand kind to
+  // OK_NonUniformConstantValue. Otherwise, return OK_AnyValue.
+  const auto *I0 = cast<Instruction>(VL.front());
+  if (I0->getOperand(OpIdx)->getType()->isFloatingPointTy()) {
+    if (!EnableFP || any_of(VL, [OpIdx, I0](Value *V) {
+          const auto *Inst = cast<Instruction>(V);
+          assert(Inst->getOpcode() == I0->getOpcode() &&
+                 "Expected same opcode");
+          return !isConstant(Inst->getOperand(OpIdx));
+        }))
+      return {TTI::OK_AnyValue, TTI::OP_None};
+    return {TTI::OK_NonUniformConstantValue, TTI::OP_None};
+  }
+
   // If all operands are exactly the same ConstantInt then set the
   // operand kind to OK_UniformConstantValue.
   // If instead not all operands are constants, then set the operand kind
   // to OK_AnyValue. If all operands are constants but not the same,
   // then set the operand kind to OK_NonUniformConstantValue.
   ConstantInt *CInt0 = nullptr;
-  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
-    const Instruction *I = cast<Instruction>(VL[i]);
-    assert(I->getOpcode() == cast<Instruction>(VL[0])->getOpcode());
-    ConstantInt *CInt = dyn_cast<ConstantInt>(I->getOperand(OpIdx));
+  for (Value *V : VL) {
+    const auto *Inst = cast<Instruction>(V);
+    assert(Inst->getOpcode() == cast<Instruction>(VL[0])->getOpcode() &&
+           "Expected same opcode");
+    auto *CInt = dyn_cast<ConstantInt>(Inst->getOperand(OpIdx));
     if (!CInt) {
       VK = TTI::OK_AnyValue;
       VP = TTI::OP_None;
       break;
     }
-    if (VP == TTI::OP_PowerOf2 &&
-        !CInt->getValue().isPowerOf2())
+    if (VP == TTI::OP_PowerOf2 && !CInt->getValue().isPowerOf2())
       VP = TTI::OP_None;
-    if (i == 0) {
+    if (!CInt0) {
       CInt0 = CInt;
       continue;
     }
@@ -6415,7 +6430,8 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       // Certain instructions can be cheaper to vectorize if they have a
       // constant second vector operand.
       const unsigned OpIdx = isa<BinaryOperator>(VL0) ? 1 : 0;
-      auto Op2Info = getOperandInfo(VL, OpIdx);
+      // TODO: impact of enabling the analysis there is yet to be determined
+      auto Op2Info = getOperandInfo(VL, OpIdx, /*EnableFP=*/false);
 
       SmallVector<const Value *, 4> Operands(VL0->operand_values());
       InstructionCost ScalarEltCost =
@@ -6500,19 +6516,12 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
       auto *SI =
           cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
       Align Alignment = SI->getAlign();
-      TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(SI->getOperand(0));
+      TTI::OperandValueInfo OpInfo = getOperandInfo(VL, 0, /*EnableFP=*/true);
       InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
           Instruction::Store, ScalarTy, Alignment, 0, CostKind, OpInfo, VL0);
       InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
       TTI::OperandValueKind OpVK = TTI::OK_AnyValue;
-      if (all_of(E->Scalars,
-                 [](Value *V) {
-                   return isConstant(cast<Instruction>(V)->getOperand(0));
-                 }) &&
-          any_of(E->Scalars, [](Value *V) {
-            Value *Op = cast<Instruction>(V)->getOperand(0);
-            return !isa<UndefValue>(Op) && !cast<Constant>(Op)->isZeroValue();
-          }))
+      if (OpInfo.isConstant())
         OpVK = TTI::OK_NonUniformConstantValue;
       InstructionCost VecStCost = TTI->getMemoryOpCost(
           Instruction::Store, VecTy, Alignment, 0, CostKind,

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll b/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll
index 64cc63ba8580..857ed0d91384 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/stores_constant_float.ll
@@ -5,9 +5,7 @@ define void @foo() {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[C:%.*]] = alloca { double, double }, align 8
-; CHECK-NEXT:    [[C_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[C]], i64 0, i32 1
-; CHECK-NEXT:    store double 0.000000e+00, ptr [[C]], align 8
-; CHECK-NEXT:    store double 1.000000e+00, ptr [[C_IMAGP]], align 8
+; CHECK-NEXT:    store <2 x double> <double 0.000000e+00, double 1.000000e+00>, ptr [[C]], align 8
 ; CHECK-NEXT:    ret void
 ;
 entry: