[llvm] [LLVM][CostModel][AArch64] Remove magic numbers from f16 vector compares. (PR #135795)
Paul Walker via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 04:00:07 PDT 2025
https://github.com/paulwalker-arm updated https://github.com/llvm/llvm-project/pull/135795
>From b7cfa4a572ce18a354a2bc705f37d0d959389954 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Tue, 15 Apr 2025 13:55:43 +0000
Subject: [PATCH 1/2] [LLVM][CostModel][AArch64] Remove magic numbers from f16
vector compares.
The PR also extends the code to cover bfloat vector compares that are
also promoted to float.
NOTE: There is a bail out for the compares that are scalarised that
will be removed by https://github.com/llvm/llvm-project/pull/135398.
---
.../AArch64/AArch64TargetTransformInfo.cpp | 32 ++++++++++++++++---
.../CostModel/AArch64/vector-select.ll | 16 +++++-----
2 files changed, 36 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 2b9d32f9208fe..f79b8277b4cd1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4236,10 +4236,34 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
}
if (isa<FixedVectorType>(ValTy) && ISD == ISD::SETCC) {
- auto LT = getTypeLegalizationCost(ValTy);
- // Cost v4f16 FCmp without FP16 support via converting to v4f32 and back.
- if (LT.second == MVT::v4f16 && !ST->hasFullFP16())
- return LT.first * 4; // fcvtl + fcvtl + fcmp + xtn
+ Type *ValScalarTy = ValTy->getScalarType();
+ if ((ValScalarTy->isHalfTy() && !ST->hasFullFP16()) ||
+ ValScalarTy->isBFloatTy()) {
+ auto *ValVTy = cast<FixedVectorType>(ValTy);
+
+ // FIXME: We currently scalarise these.
+ if (ValVTy->getNumElements() > 4)
+ return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
+ CostKind, Op1Info, Op2Info, I);
+
+ // Without dedicated instructions we promote [b]f16 compares to f32.
+ auto *PromotedTy =
+ VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy);
+
+ InstructionCost Cost = 0;
+ // Promte operands to float vectors.
+ Cost += 2 * getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy,
+ TTI::CastContextHint::None, CostKind);
+ // Compare float vectors.
+ Cost += getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred, CostKind,
+ Op1Info, Op2Info);
+ // During codegen we'll truncate the vector result from i32 to i16.
+ Cost +=
+ getCastInstrCost(Instruction::Trunc, VectorType::getInteger(ValVTy),
+ VectorType::getInteger(PromotedTy),
+ TTI::CastContextHint::None, CostKind);
+ return Cost;
+ }
}
// Treat the icmp in icmp(and, 0) as free, as we can make use of ands.
diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
index c2256159a8ee2..e66f94dd54f21 100644
--- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll
@@ -168,7 +168,7 @@ define <2 x double> @v2f64_select_ogt(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_ogt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_ogt'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp ogt <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ogt <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -255,7 +255,7 @@ define <2 x double> @v2f64_select_oge(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_oge(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_oge'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp oge <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oge <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -342,7 +342,7 @@ define <2 x double> @v2f64_select_olt(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_olt(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_olt'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp olt <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp olt <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -429,7 +429,7 @@ define <2 x double> @v2f64_select_ole(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_ole(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_ole'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp ole <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ole <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -516,7 +516,7 @@ define <2 x double> @v2f64_select_oeq(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_oeq(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_oeq'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp oeq <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp oeq <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -603,7 +603,7 @@ define <2 x double> @v2f64_select_one(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_one(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_one'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp one <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp one <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -690,7 +690,7 @@ define <2 x double> @v2f64_select_une(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_une(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_une'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp une <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp une <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
@@ -777,7 +777,7 @@ define <2 x double> @v2f64_select_ord(<2 x double> %a, <2 x double> %b, <2 x dou
define <4 x bfloat> @v4bf16_select_ord(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c) {
; COST-LABEL: 'v4bf16_select_ord'
-; COST-NEXT: Cost Model: Found costs of 1 for: %cmp.1 = fcmp ord <4 x bfloat> %a, %b
+; COST-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:1 SizeLat:1 for: %cmp.1 = fcmp ord <4 x bfloat> %a, %b
; COST-NEXT: Cost Model: Found costs of RThru:10 CodeSize:1 Lat:1 SizeLat:1 for: %s.1 = select <4 x i1> %cmp.1, <4 x bfloat> %a, <4 x bfloat> %c
; COST-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x bfloat> %s.1
;
>From b0a5cdb76e7a190ec9be9aa97fe9591b65a72ef9 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker at arm.com>
Date: Thu, 17 Apr 2025 10:59:13 +0000
Subject: [PATCH 2/2] Fix typo Promte->Promote.
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index f79b8277b4cd1..5f3306826cc6a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4251,7 +4251,7 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
VectorType::get(Type::getFloatTy(ValTy->getContext()), ValVTy);
InstructionCost Cost = 0;
- // Promte operands to float vectors.
+ // Promote operands to float vectors.
Cost += 2 * getCastInstrCost(Instruction::FPExt, PromotedTy, ValTy,
TTI::CastContextHint::None, CostKind);
// Compare float vectors.
More information about the llvm-commits
mailing list