[llvm] c39460c - Revert "[LoopVectorize] Simplify scalar cost calculation in getInstructionCost"
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 26 04:37:28 PDT 2021
Author: David Sherwood
Date: 2021-03-26T11:36:53Z
New Revision: c39460cc4f7c000ad0daf444bd42c4e9cb937e93
URL: https://github.com/llvm/llvm-project/commit/c39460cc4f7c000ad0daf444bd42c4e9cb937e93
DIFF: https://github.com/llvm/llvm-project/commit/c39460cc4f7c000ad0daf444bd42c4e9cb937e93.diff
LOG: Revert "[LoopVectorize] Simplify scalar cost calculation in getInstructionCost"
This reverts commit 240aa96cf25d880dde7a0db5d96918cfaa4b8891.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d113a46a9ae0..7c90b7231e09 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7253,36 +7253,10 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
Type *RetTy = I->getType();
if (canTruncateToMinimalBitwidth(I, VF))
RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
+ VectorTy = isScalarAfterVectorization(I, VF) ? RetTy : ToVectorTy(RetTy, VF);
auto SE = PSE.getSE();
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
- auto hasSingleCopyAfterVectorization = [this](Instruction *I,
- ElementCount VF) -> bool {
- if (VF.isScalar())
- return true;
-
- auto Scalarized = InstsToScalarize.find(VF);
- assert(Scalarized != InstsToScalarize.end() &&
- "VF not yet analyzed for scalarization profitability");
- return !Scalarized->second.count(I) &&
- llvm::all_of(I->users(), [&](User *U) {
- auto *UI = cast<Instruction>(U);
- return !Scalarized->second.count(UI);
- });
- };
-
- if (isScalarAfterVectorization(I, VF)) {
- VectorTy = RetTy;
- // With the exception of GEPs, after scalarization there should only be one
- // copy of the instruction generated in the loop. This is because the VF is
- // either 1, or any instructions that need scalarizing have already been
- // dealt with by the the time we get here. As a result, it means we don't
- // have to multiply the instruction cost by VF.
- assert(I->getOpcode() == Instruction::GetElementPtr ||
- hasSingleCopyAfterVectorization(I, VF));
- } else
- VectorTy = ToVectorTy(RetTy, VF);
-
// TODO: We need to estimate the cost of intrinsic calls.
switch (I->getOpcode()) {
case Instruction::GetElementPtr:
@@ -7410,16 +7384,21 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
Op2VK = TargetTransformInfo::OK_UniformValue;
SmallVector<const Value *, 4> Operands(I->operand_values());
- return TTI.getArithmeticInstrCost(
- I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue,
- Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
+ return N * TTI.getArithmeticInstrCost(
+ I->getOpcode(), VectorTy, CostKind,
+ TargetTransformInfo::OK_AnyValue,
+ Op2VK, TargetTransformInfo::OP_None, Op2VP, Operands, I);
}
case Instruction::FNeg: {
assert(!VF.isScalable() && "VF is assumed to be non scalable.");
- return TTI.getArithmeticInstrCost(
- I->getOpcode(), VectorTy, CostKind, TargetTransformInfo::OK_AnyValue,
- TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None,
- TargetTransformInfo::OP_None, I->getOperand(0), I);
+ unsigned N = isScalarAfterVectorization(I, VF) ? VF.getKnownMinValue() : 1;
+ return N * TTI.getArithmeticInstrCost(
+ I->getOpcode(), VectorTy, CostKind,
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OK_AnyValue,
+ TargetTransformInfo::OP_None, TargetTransformInfo::OP_None,
+ I->getOperand(0), I);
}
case Instruction::Select: {
SelectInst *SI = cast<SelectInst>(I);
@@ -7543,7 +7522,14 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
}
}
- return TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
+ unsigned N;
+ if (isScalarAfterVectorization(I, VF)) {
+ assert(!VF.isScalable() && "VF is assumed to be non scalable");
+ N = VF.getKnownMinValue();
+ } else
+ N = 1;
+ return N *
+ TTI.getCastInstrCost(Opcode, VectorTy, SrcVecTy, CCH, CostKind, I);
}
case Instruction::Call: {
bool NeedToScalarize;
@@ -7558,8 +7544,11 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
case Instruction::ExtractValue:
return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);
default:
- // This opcode is unknown. Assume that it is the same as 'mul'.
- return TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);
+ // The cost of executing VF copies of the scalar instruction. This opcode
+ // is unknown. Assume that it is the same as 'mul'.
+ return VF.getKnownMinValue() * TTI.getArithmeticInstrCost(
+ Instruction::Mul, VectorTy, CostKind) +
+ getScalarizationOverhead(I, VF);
} // end of switch.
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
index 3061998518ad..247ea35ff5d0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/no_vector_instructions.ll
@@ -6,7 +6,7 @@ target triple = "aarch64--linux-gnu"
; CHECK-LABEL: all_scalar
; CHECK: LV: Found scalar instruction: %i.next = add nuw nsw i64 %i, 2
-; CHECK: LV: Found an estimated cost of 1 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
+; CHECK: LV: Found an estimated cost of 2 for VF 2 For instruction: %i.next = add nuw nsw i64 %i, 2
; CHECK: LV: Not considering vector loop of width 2 because it will not generate any vector instructions
;
define void @all_scalar(i64* %a, i64 %n) {
More information about the llvm-commits
mailing list