[llvm] 26e42c7 - [CostModel][AArch64] Remove promotion cost for SVE bfloat arith supported with +sve-b16b16 (#167717)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 17 01:00:40 PST 2025
Author: Benjamin Maxwell
Date: 2025-11-17T09:00:36Z
New Revision: 26e42c7e5321a233cfe75d18435090045241f1ec
URL: https://github.com/llvm/llvm-project/commit/26e42c7e5321a233cfe75d18435090045241f1ec
DIFF: https://github.com/llvm/llvm-project/commit/26e42c7e5321a233cfe75d18435090045241f1ec.diff
LOG: [CostModel][AArch64] Remove promotion cost for SVE bfloat arith supported with +sve-b16b16 (#167717)
The resulting costs are the same as the standard SVE costs for `half`
types.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d92421701c373..7dd571c7fe60d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4167,12 +4167,15 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead(
std::optional<InstructionCost> AArch64TTIImpl::getFP16BF16PromoteCost(
Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
- TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
+ TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE,
std::function<InstructionCost(Type *)> InstCost) const {
if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
return std::nullopt;
if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
return std::nullopt;
+ if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
+ ST->isNonStreamingSVEorSME2Available())
+ return std::nullopt;
Type *PromotedTy = Ty->getWithNewType(Type::getFloatTy(Ty->getContext()));
InstructionCost Cost = getCastInstrCost(Instruction::FPExt, PromotedTy, Ty,
@@ -4214,6 +4217,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
ISD == ISD::FDIV || ISD == ISD::FREM)
if (auto PromotedCost = getFP16BF16PromoteCost(
Ty, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/true,
+ // There is not native support for fdiv/frem even with +sve-b16b16.
+ /*CanUseSVE=*/ISD != ISD::FDIV && ISD != ISD::FREM,
[&](Type *PromotedTy) {
return getArithmeticInstrCost(Opcode, PromotedTy, CostKind,
Op1Info, Op2Info);
@@ -4628,7 +4633,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
if (Opcode == Instruction::FCmp) {
if (auto PromotedCost = getFP16BF16PromoteCost(
ValTy, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/false,
- [&](Type *PromotedTy) {
+ // TODO: Consider costing SVE FCMPs.
+ /*CanUseSVE=*/false, [&](Type *PromotedTy) {
InstructionCost Cost =
getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred,
CostKind, Op1Info, Op2Info);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e62fdb6786843..d189f563f99a1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -456,11 +456,10 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
/// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the
/// architecture features are not present.
- std::optional<InstructionCost>
- getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind,
- TTI::OperandValueInfo Op1Info,
- TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
- std::function<InstructionCost(Type *)> InstCost) const;
+ std::optional<InstructionCost> getFP16BF16PromoteCost(
+ Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
+ TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE,
+ std::function<InstructionCost(Type *)> InstCost) const;
InstructionCost
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
index d9e26dc47b53f..a735640311ff6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
@@ -33,11 +33,17 @@ define void @fadd() {
}
define void @fadd_bf16() {
-; CHECK-LABEL: 'fadd_bf16'
-; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fadd_bf16'
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'fadd_bf16'
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
%NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
@@ -76,11 +82,17 @@ define void @fsub() {
}
define void @fsub_bf16() {
-; CHECK-LABEL: 'fsub_bf16'
-; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fsub_bf16'
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'fsub_bf16'
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
%NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
@@ -160,11 +172,17 @@ define void @fmul() {
}
define void @fmul_bf16() {
-; CHECK-LABEL: 'fmul_bf16'
-; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
-; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fmul_bf16'
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'fmul_bf16'
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
;
%NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
%NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
More information about the llvm-commits
mailing list