[llvm] 3ef9220 - [CostModel][X86] Add missing AVX512 vector mul overflow intrinsic costs
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 29 07:54:03 PDT 2024
Author: Simon Pilgrim
Date: 2024-07-29T15:53:42+01:00
New Revision: 3ef922081193ae1c46683f3ca282e700efc9005e
URL: https://github.com/llvm/llvm-project/commit/3ef922081193ae1c46683f3ca282e700efc9005e
DIFF: https://github.com/llvm/llvm-project/commit/3ef922081193ae1c46683f3ca282e700efc9005e.diff
LOG: [CostModel][X86] Add missing AVX512 vector mul overflow intrinsic costs
Fix regressions in #100519
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/arith-overflow.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f7104568f593f..02267c14896c9 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -3680,6 +3680,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::SMIN, MVT::v2i64, { 1, 3, 1, 1 } },
{ ISD::SMULO, MVT::v8i64, { 44, 44, 81, 93 } },
{ ISD::SMULO, MVT::v16i32, { 5, 12, 9, 11 } },
+ { ISD::SMULO, MVT::v32i16, { 6, 12, 17, 17 } },
+ { ISD::SMULO, MVT::v64i8, { 22, 28, 42, 42 } },
{ ISD::SSUBSAT, MVT::v2i64, { 2, 13, 9, 10 } },
{ ISD::SSUBSAT, MVT::v4i64, { 2, 15, 7, 8 } },
{ ISD::SSUBSAT, MVT::v8i64, { 2, 14, 7, 8 } },
@@ -3702,6 +3704,8 @@ X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::UMIN, MVT::v2i64, { 1, 3, 1, 1 } },
{ ISD::UMULO, MVT::v8i64, { 52, 52, 95, 104} },
{ ISD::UMULO, MVT::v16i32, { 5, 12, 8, 10 } },
+ { ISD::UMULO, MVT::v32i16, { 5, 13, 16, 16 } },
+ { ISD::UMULO, MVT::v64i8, { 18, 24, 30, 30 } },
{ ISD::UADDSAT, MVT::v2i64, { 1, 4, 4, 4 } },
{ ISD::UADDSAT, MVT::v4i64, { 1, 4, 4, 4 } },
{ ISD::UADDSAT, MVT::v8i64, { 1, 4, 4, 4 } },
diff --git a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
index 963bb8a9d9fac..d99a6a793f964 100644
--- a/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
+++ b/llvm/test/Analysis/CostModel/X86/arith-overflow.ll
@@ -1076,11 +1076,11 @@ define i32 @smul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'smul'
@@ -1114,11 +1114,11 @@ define i32 @smul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'smul'
@@ -1314,11 +1314,11 @@ define i32 @umul(i32 %arg) {
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512F-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512F-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512F-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512BW-LABEL: 'umul'
@@ -1352,11 +1352,11 @@ define i32 @umul(i32 %arg) {
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x i16> undef, <8 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = call { <16 x i16>, <16 x i1> } @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V32I16 = call { <32 x i16>, <32 x i1> } @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x i8> undef, <16 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x i8> undef, <32 x i8> undef)
-; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
+; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V64I8 = call { <64 x i8>, <64 x i1> } @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
; AVX512DQ-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'umul'
More information about the llvm-commits
mailing list