[llvm] 043bc88 - [CostModel][X86] Improve v2f32 fadd reduction cost
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 23 08:56:29 PDT 2021
Author: Simon Pilgrim
Date: 2021-04-23T16:56:13+01:00
New Revision: 043bc88dbada3efd8bc811efc9fef610f57d47f4
URL: https://github.com/llvm/llvm-project/commit/043bc88dbada3efd8bc811efc9fef610f57d47f4
DIFF: https://github.com/llvm/llvm-project/commit/043bc88dbada3efd8bc811efc9fef610f57d47f4.diff
LOG: [CostModel][X86] Improve v2f32 fadd reduction cost
This was being reported as a similar cost to v4f32 when its a lot cheaper (just a shufps+addps).
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index ba5f0e76afd3f..e04118d935600 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3380,6 +3380,7 @@ X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy,
static const CostTblEntry SSE2CostTblNoPairWise[] = {
{ ISD::FADD, MVT::v2f64, 2 },
+ { ISD::FADD, MVT::v2f32, 2 },
{ ISD::FADD, MVT::v4f32, 4 },
{ ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
{ ISD::ADD, MVT::v2i32, 2 }, // FIXME: chosen to be less than v4i32
diff --git a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
index d918eaa9e652b..bc13591b89dc9 100644
--- a/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
+++ b/llvm/test/Analysis/CostModel/X86/reduce-fadd.ll
@@ -77,7 +77,7 @@ define void @reduce_f64(double %arg) {
define void @reduce_f32(float %arg) {
; SSE2-LABEL: 'reduce_f32'
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
@@ -86,7 +86,7 @@ define void @reduce_f32(float %arg) {
;
; SSSE3-LABEL: 'reduce_f32'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; SSSE3-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
@@ -95,7 +95,7 @@ define void @reduce_f32(float %arg) {
;
; SSE41-LABEL: 'reduce_f32'
; SSE41-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; SSE41-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; SSE41-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; SSE41-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; SSE41-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
@@ -104,7 +104,7 @@ define void @reduce_f32(float %arg) {
;
; SSE42-LABEL: 'reduce_f32'
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
@@ -113,7 +113,7 @@ define void @reduce_f32(float %arg) {
;
; AVX1-LABEL: 'reduce_f32'
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
@@ -122,7 +122,7 @@ define void @reduce_f32(float %arg) {
;
; AVX2-LABEL: 'reduce_f32'
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
@@ -131,7 +131,7 @@ define void @reduce_f32(float %arg) {
;
; AVX512-LABEL: 'reduce_f32'
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %V1 = call float @llvm.vector.reduce.fadd.v1f32(float %arg, <1 x float> undef)
-; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
+; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2 = call float @llvm.vector.reduce.fadd.v2f32(float %arg, <2 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4 = call float @llvm.vector.reduce.fadd.v4f32(float %arg, <4 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8 = call float @llvm.vector.reduce.fadd.v8f32(float %arg, <8 x float> undef)
; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16 = call float @llvm.vector.reduce.fadd.v16f32(float %arg, <16 x float> undef)
More information about the llvm-commits
mailing list