[llvm] 26e42c7 - [CostModel][AArch64] Remove promotion cost for SVE bfloat arith supported with +sve-b16b16 (#167717)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 17 01:00:40 PST 2025


Author: Benjamin Maxwell
Date: 2025-11-17T09:00:36Z
New Revision: 26e42c7e5321a233cfe75d18435090045241f1ec

URL: https://github.com/llvm/llvm-project/commit/26e42c7e5321a233cfe75d18435090045241f1ec
DIFF: https://github.com/llvm/llvm-project/commit/26e42c7e5321a233cfe75d18435090045241f1ec.diff

LOG: [CostModel][AArch64] Remove promotion cost for SVE bfloat arith supported with +sve-b16b16  (#167717)

The resulting costs are the same as the standard SVE costs for `half`
types.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index d92421701c373..7dd571c7fe60d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4167,12 +4167,15 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead(
 
 std::optional<InstructionCost> AArch64TTIImpl::getFP16BF16PromoteCost(
     Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
-    TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
+    TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE,
     std::function<InstructionCost(Type *)> InstCost) const {
   if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
     return std::nullopt;
   if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
     return std::nullopt;
+  if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
+      ST->isNonStreamingSVEorSME2Available())
+    return std::nullopt;
 
   Type *PromotedTy = Ty->getWithNewType(Type::getFloatTy(Ty->getContext()));
   InstructionCost Cost = getCastInstrCost(Instruction::FPExt, PromotedTy, Ty,
@@ -4214,6 +4217,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
       ISD == ISD::FDIV || ISD == ISD::FREM)
     if (auto PromotedCost = getFP16BF16PromoteCost(
             Ty, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/true,
+            // There is not native support for fdiv/frem even with +sve-b16b16.
+            /*CanUseSVE=*/ISD != ISD::FDIV && ISD != ISD::FREM,
             [&](Type *PromotedTy) {
               return getArithmeticInstrCost(Opcode, PromotedTy, CostKind,
                                             Op1Info, Op2Info);
@@ -4628,7 +4633,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
   if (Opcode == Instruction::FCmp) {
     if (auto PromotedCost = getFP16BF16PromoteCost(
             ValTy, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/false,
-            [&](Type *PromotedTy) {
+            // TODO: Consider costing SVE FCMPs.
+            /*CanUseSVE=*/false, [&](Type *PromotedTy) {
               InstructionCost Cost =
                   getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred,
                                      CostKind, Op1Info, Op2Info);

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index e62fdb6786843..d189f563f99a1 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -456,11 +456,10 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
 
   /// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the
   /// architecture features are not present.
-  std::optional<InstructionCost>
-  getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind,
-                         TTI::OperandValueInfo Op1Info,
-                         TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
-                         std::function<InstructionCost(Type *)> InstCost) const;
+  std::optional<InstructionCost> getFP16BF16PromoteCost(
+      Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
+      TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE,
+      std::function<InstructionCost(Type *)> InstCost) const;
 
   InstructionCost
   getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

diff  --git a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
index d9e26dc47b53f..a735640311ff6 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll
@@ -33,11 +33,17 @@ define void @fadd() {
 }
 
 define void @fadd_bf16() {
-; CHECK-LABEL: 'fadd_bf16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fadd_bf16'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'fadd_bf16'
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
   %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
@@ -76,11 +82,17 @@ define void @fsub() {
 }
 
 define void @fsub_bf16() {
-; CHECK-LABEL: 'fsub_bf16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fsub_bf16'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'fsub_bf16'
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
   %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
@@ -160,11 +172,17 @@ define void @fmul() {
 }
 
 define void @fmul_bf16() {
-; CHECK-LABEL: 'fmul_bf16'
-; CHECK-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
-; CHECK-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+; CHECK-BASE-LABEL: 'fmul_bf16'
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; CHECK-BASE-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-BF16-LABEL: 'fmul_bf16'
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
+; CHECK-BF16-NEXT:  Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
 ;
   %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
   %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison


        


More information about the llvm-commits mailing list