[PATCH] D108292: [Analysis][AArch64] Make fixed-width ordered reductions slightly more expensive
David Sherwood via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 18 09:02:20 PDT 2021
This revision was automatically updated to reflect the committed changes.
david-arm marked an inline comment as done.
Closed by commit rG219d4518fce9: [Analysis][AArch64] Make fixed-width ordered reductions slightly more expensive (authored by david-arm).
Changed prior to commit:
https://reviews.llvm.org/D108292?vs=367176&id=367230#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D108292/new/
https://reviews.llvm.org/D108292
Files:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
Index: llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/strict-fadd-cost.ll
@@ -6,8 +6,8 @@
target triple="aarch64-unknown-linux-gnu"
-; CHECK-VF4: Found an estimated cost of 17 for VF 4 For instruction: %add = fadd float %0, %sum.07
-; CHECK-VF8: Found an estimated cost of 34 for VF 8 For instruction: %add = fadd float %0, %sum.07
+; CHECK-VF4: Found an estimated cost of 21 for VF 4 For instruction: %add = fadd float %0, %sum.07
+; CHECK-VF8: Found an estimated cost of 42 for VF 8 For instruction: %add = fadd float %0, %sum.07
define float @fadd_strict32(float* noalias nocapture readonly %a, i64 %n) {
entry:
@@ -28,8 +28,8 @@
}
-; CHECK-VF4: Found an estimated cost of 14 for VF 4 For instruction: %add = fadd double %0, %sum.07
-; CHECK-VF8: Found an estimated cost of 28 for VF 8 For instruction: %add = fadd double %0, %sum.07
+; CHECK-VF4: Found an estimated cost of 18 for VF 4 For instruction: %add = fadd double %0, %sum.07
+; CHECK-VF8: Found an estimated cost of 36 for VF 8 For instruction: %add = fadd double %0, %sum.07
define double @fadd_strict64(double* noalias nocapture readonly %a, i64 %n) {
entry:
Index: llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
===================================================================
--- llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
+++ llvm/test/Analysis/CostModel/AArch64/reduce-fadd.ll
@@ -2,10 +2,10 @@
define void @strict_fp_reductions() {
; CHECK-LABEL: strict_fp_reductions
-; CHECK-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
-; CHECK-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.000000e+00, <2 x double> undef)
+; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %fadd_v4f64 = call double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> undef)
%fadd_v4f32 = call float @llvm.vector.reduce.fadd.v4f32(float 0.0, <4 x float> undef)
%fadd_v8f32 = call float @llvm.vector.reduce.fadd.v8f32(float 0.0, <8 x float> undef)
%fadd_v2f64 = call double @llvm.vector.reduce.fadd.v2f64(double 0.0, <2 x double> undef)
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1999,8 +1999,13 @@
Optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind) {
if (TTI::requiresOrderedReduction(FMF)) {
- if (!isa<ScalableVectorType>(ValTy))
- return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+ if (auto *FixedVTy = dyn_cast<FixedVectorType>(ValTy)) {
+ InstructionCost BaseCost =
+ BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind);
+ // Add on extra cost to reflect the extra overhead on some CPUs. We still
+ // end up vectorizing for more computationally intensive loops.
+ return BaseCost + FixedVTy->getNumElements();
+ }
if (Opcode != Instruction::FAdd)
return InstructionCost::getInvalid();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D108292.367230.patch
Type: text/x-patch
Size: 4378 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210818/1b9ac4b9/attachment.bin>
More information about the llvm-commits
mailing list