[llvm] [RISCV] Add codegen support for Zvfbfmin (PR #87911)
Jianjian Guan via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 10 20:07:49 PDT 2024
https://github.com/jacquesguan updated https://github.com/llvm/llvm-project/pull/87911
>From cd0f02a1e95bbfe361f9572ab2b4f67c5fa5bcf1 Mon Sep 17 00:00:00 2001
From: Jianjian GUAN <jacquesguan at me.com>
Date: Sun, 7 Apr 2024 17:35:38 +0800
Subject: [PATCH] [RISCV] Add codegen support for Zvfbfmin
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 40 +++++-
.../Target/RISCV/RISCVInstrInfoVSDPatterns.td | 14 ++
.../Target/RISCV/RISCVInstrInfoVVLPatterns.td | 30 +++++
.../RISCV/rvv/fixed-vectors-fpext-vp.ll | 58 ++++++++-
.../RISCV/rvv/fixed-vectors-fptrunc-vp.ll | 58 ++++++++-
.../RISCV/rvv/fixed-vectors-load-store.ll | 16 ++-
.../CodeGen/RISCV/rvv/fixed-vectors-load.ll | 14 +-
.../CodeGen/RISCV/rvv/fixed-vectors-store.ll | 14 +-
llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll | 120 +++++++++++++++++-
llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll | 59 ++++++++-
.../test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll | 81 +++++++++++-
llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll | 58 ++++++++-
12 files changed, 529 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1a3ef6feea3e31..9edef62c222a0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1081,6 +1081,21 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ // TODO: Could we merge some code with zvfhmin?
+ if (Subtarget.hasVInstructionsBF16()) {
+ for (MVT VT : BF16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ // TODO: Promote to fp32.
+ }
+ }
+
if (Subtarget.hasVInstructionsF32()) {
for (MVT VT : F32VecVTs) {
if (!isTypeLegal(VT))
@@ -1296,6 +1311,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
continue;
}
+ if (VT.getVectorElementType() == MVT::bf16) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR,
+ ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR},
+ VT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ // TODO: Promote to fp32.
+ continue;
+ }
+
// We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
setOperationAction({ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR}, VT,
Custom);
@@ -2550,6 +2576,10 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
if (!Subtarget.hasVInstructionsF16Minimal())
return false;
break;
+ case MVT::bf16:
+ if (!Subtarget.hasVInstructionsBF16())
+ return false;
+ break;
case MVT::f32:
if (!Subtarget.hasVInstructionsF32())
return false;
@@ -2601,6 +2631,7 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
case MVT::i16:
case MVT::i32:
case MVT::i64:
+ case MVT::bf16:
case MVT::f16:
case MVT::f32:
case MVT::f64: {
@@ -8096,9 +8127,12 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
SDValue Src = Op.getOperand(0);
MVT SrcVT = Src.getSimpleValueType();
- bool IsDirectExtend = IsExtend && (VT.getVectorElementType() != MVT::f64 ||
- SrcVT.getVectorElementType() != MVT::f16);
- bool IsDirectTrunc = !IsExtend && (VT.getVectorElementType() != MVT::f16 ||
+ bool IsDirectExtend =
+ IsExtend && (VT.getVectorElementType() != MVT::f64 ||
+ (SrcVT.getVectorElementType() != MVT::f16 &&
+ SrcVT.getVectorElementType() != MVT::bf16));
+ bool IsDirectTrunc = !IsExtend && ((VT.getVectorElementType() != MVT::f16 &&
+ VT.getVectorElementType() != MVT::bf16) ||
SrcVT.getVectorElementType() != MVT::f64);
bool IsDirectConv = IsDirectExtend || IsDirectTrunc;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
index da761ae856706a..12c521ddeece95 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td
@@ -1450,6 +1450,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVInstructionsBF16] in
+ def : Pat<(fvti.Vector (fpround (fwti.Vector fwti.RegClass:$rs1))),
+ (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX)
+ (fvti.Vector (IMPLICIT_DEF)),
+ fwti.RegClass:$rs1,
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ fvti.AVL, fvti.Log2SEW, TA_MA)>;
+}
+
//===----------------------------------------------------------------------===//
// Vector Splats
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index b721dcd9898854..e07c72c2fdf6d9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2668,6 +2668,20 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
GPR:$vl, fvti.Log2SEW, TA_MA)>;
}
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVInstructionsBF16] in
+ def : Pat<(fwti.Vector (any_riscv_fpextend_vl
+ (fvti.Vector fvti.RegClass:$rs1),
+ (fvti.Mask V0),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFWCVTBF16_F_F_V_"#fvti.LMul.MX#"_MASK")
+ (fwti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1,
+ (fvti.Mask V0),
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
+
// 13.19 Narrowing Floating-Point/Integer Type-Convert Instructions
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_xu_f_vl, "PseudoVFNCVT_XU_F_W">;
defm : VPatNConvertFP2IVL_W_RM<riscv_vfcvt_x_f_vl, "PseudoVFNCVT_X_F_W">;
@@ -2712,6 +2726,22 @@ foreach fvtiToFWti = AllWidenableFloatVectors in {
}
}
+foreach fvtiToFWti = AllWidenableBFloatToFloatVectors in {
+ defvar fvti = fvtiToFWti.Vti;
+ defvar fwti = fvtiToFWti.Wti;
+ let Predicates = [HasVInstructionsBF16] in
+ def : Pat<(fvti.Vector (any_riscv_fpround_vl
+ (fwti.Vector fwti.RegClass:$rs1),
+ (fwti.Mask V0), VLOpFrag)),
+ (!cast<Instruction>("PseudoVFNCVTBF16_F_F_W_"#fvti.LMul.MX#"_MASK")
+ (fvti.Vector (IMPLICIT_DEF)), fwti.RegClass:$rs1,
+ (fwti.Mask V0),
+ // Value to indicate no rounding mode change in
+ // RISCVInsertReadWriteCSR
+ FRM_DYN,
+ GPR:$vl, fvti.Log2SEW, TA_MA)>;
+}
+
// 14. Vector Reduction Operations
// 14.1. Vector Single-Width Integer Reduction Instructions
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
index 51ac27acaf470d..48cc3f17a6269f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32)
@@ -120,3 +120,53 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 ze
%v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl)
ret <32 x double> %v
}
+
+declare <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x float> @vfpext_v2bf16_v2f32(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x float> %v
+}
+
+define <2 x float> @vfpext_v2bf16_v2f32_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x float> @llvm.vp.fpext.v2f32.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x float> %v
+}
+
+declare <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x double> @vfpext_v2bf16_v2f64(<2 x bfloat> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x double> %v
+}
+
+define <2 x double> @vfpext_v2bf16_v2f64_unmasked(<2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_v2bf16_v2f64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x double> @llvm.vp.fpext.v2f64.v2bf16(<2 x bfloat> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x double> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
index de11f9e8a9fa2a..d890bf5412f9fa 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32)
@@ -122,3 +122,53 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32
%v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl)
ret <32 x float> %v
}
+
+declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f32(<2 x float> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f32_unmasked(<2 x float> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f32(<2 x float> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x bfloat> %v
+}
+
+declare <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f64(<2 x double> %a, <2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> %m, i32 %vl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfptrunc_v2bf16_v2f64_unmasked(<2 x double> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_v2bf16_v2f64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fptrunc.v2bf16.v2f64(<2 x double> %a, <2 x i1> shufflevector (<2 x i1> insertelement (<2 x i1> undef, i1 true, i32 0), <2 x i1> undef, <2 x i32> zeroinitializer), i32 %vl)
+ ret <2 x bfloat> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll
index 38aee567e2b5cb..fbe8bcbc0d3c50 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load-store.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
define void @v2i8(ptr %p, ptr %q) {
; CHECK-LABEL: v2i8:
@@ -301,3 +301,15 @@ define void @v2i8_volatile_store(ptr %p, ptr %q) {
store volatile <2 x i8> %v, ptr %q
ret void
}
+
+define void @v8bf16(ptr %p, ptr %q) {
+; CHECK-LABEL: v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
+ %v = load <8 x bfloat>, ptr %p
+ store <8 x bfloat> %v, ptr %q
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 791e6eb5ff300d..509a4e0fe66f13 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
define <5 x i8> @load_v5i8(ptr %p) {
; CHECK-LABEL: load_v5i8:
@@ -181,3 +181,13 @@ define <16 x i64> @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) {
%v = load <16 x i64>, ptr %p
ret <16 x i64> %v
}
+
+define <8 x bfloat> @load_v6bf16(ptr %p) {
+; CHECK-LABEL: load_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: ret
+ %x = load <8 x bfloat>, ptr %p
+ ret <8 x bfloat> %x
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index b747d73ce353e2..6317a4977562af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
-; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV32 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
define void @store_v5i8(ptr %p, <5 x i8> %v) {
; CHECK-LABEL: store_v5i8:
@@ -294,6 +294,16 @@ define void @exact_vlen_i64_m8(ptr %p) vscale_range(2,2) {
ret void
}
+define void @store_v8bf16(ptr %p, <8 x bfloat> %v) {
+; CHECK-LABEL: store_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ store <8 x bfloat> %v, ptr %p
+ ret void
+}
+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; RV32: {{.*}}
; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
index d805a103aafd37..b002b8e7656687 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-sdnode.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <vscale x 1 x float> @vfpext_nxv1f16_nxv1f32(<vscale x 1 x half> %va) {
@@ -167,3 +167,115 @@ define <vscale x 8 x double> @vfpext_nxv8f32_nxv8f64(<vscale x 8 x float> %va) {
%evec = fpext <vscale x 8 x float> %va to <vscale x 8 x double>
ret <vscale x 8 x double> %evec
}
+
+define <vscale x 1 x float> @vfpext_nxv1bf16_nxv1f32(<vscale x 1 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv1bf16_nxv1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x float>
+ ret <vscale x 1 x float> %evec
+}
+
+define <vscale x 1 x double> @vfpext_nxv1bf16_nxv1f64(<vscale x 1 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv1bf16_nxv1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 1 x bfloat> %va to <vscale x 1 x double>
+ ret <vscale x 1 x double> %evec
+}
+
+define <vscale x 2 x float> @vfpext_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x float>
+ ret <vscale x 2 x float> %evec
+}
+
+define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 2 x bfloat> %va to <vscale x 2 x double>
+ ret <vscale x 2 x double> %evec
+}
+
+define <vscale x 4 x float> @vfpext_nxv4bf16_nxv4f32(<vscale x 4 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv4bf16_nxv4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x float>
+ ret <vscale x 4 x float> %evec
+}
+
+define <vscale x 4 x double> @vfpext_nxv4bf16_nxv4f64(<vscale x 4 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv4bf16_nxv4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v12
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 4 x bfloat> %va to <vscale x 4 x double>
+ ret <vscale x 4 x double> %evec
+}
+
+define <vscale x 8 x float> @vfpext_nxv8bf16_nxv8f32(<vscale x 8 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv8bf16_nxv8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x float>
+ ret <vscale x 8 x float> %evec
+}
+
+define <vscale x 8 x double> @vfpext_nxv8bf16_nxv8f64(<vscale x 8 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv8bf16_nxv8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v16
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 8 x bfloat> %va to <vscale x 8 x double>
+ ret <vscale x 8 x double> %evec
+}
+
+define <vscale x 16 x float> @vfpext_nxv16bf16_nxv16f32(<vscale x 16 x bfloat> %va) {
+;
+; CHECK-LABEL: vfpext_nxv16bf16_nxv16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8
+; CHECK-NEXT: vmv8r.v v8, v16
+; CHECK-NEXT: ret
+ %evec = fpext <vscale x 16 x bfloat> %va to <vscale x 16 x float>
+ ret <vscale x 16 x float> %evec
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
index 5cfa98916a2de0..aaaf4ad4607119 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, i32)
@@ -120,3 +120,54 @@ define <vscale x 32 x float> @vfpext_nxv32f16_nxv32f32(<vscale x 32 x half> %a,
%v = call <vscale x 32 x float> @llvm.vp.fpext.nxv32f32.nxv32f16(<vscale x 32 x half> %a, <vscale x 32 x i1> %m, i32 %vl)
ret <vscale x 32 x float> %v
}
+
+declare <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x float> @vfpext_nxv2bf16_nxv2f32(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> %m, i32 %vl)
+ ret <vscale x 2 x float> %v
+}
+
+define <vscale x 2 x float> @vfpext_nxv2bf16_nxv2f32_unmasked(<vscale x 2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x float> @llvm.vp.fpext.nxv2f32.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+ ret <vscale x 2 x float> %v
+}
+
+declare <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> %m, i32 %vl)
+ ret <vscale x 2 x double> %v
+}
+
+define <vscale x 2 x double> @vfpext_nxv2bf16_nxv2f64_unmasked(<vscale x 2 x bfloat> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfpext_nxv2bf16_nxv2f64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
+; CHECK-NEXT: vfwcvt.f.f.v v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x double> @llvm.vp.fpext.nxv2f64.nxv2bf16(<vscale x 2 x bfloat> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+ ret <vscale x 2 x double> %v
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
index d715b46e95feb4..9148a79cb74079 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-sdnode.ll
@@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+experimental-zvfbfmin -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s
define <vscale x 1 x half> @vfptrunc_nxv1f32_nxv1f16(<vscale x 1 x float> %va) {
@@ -167,3 +167,76 @@ define <vscale x 8 x float> @vfptrunc_nxv8f64_nxv8f32(<vscale x 8 x double> %va)
%evec = fptrunc <vscale x 8 x double> %va to <vscale x 8 x float>
ret <vscale x 8 x float> %evec
}
+
+define <vscale x 1 x bfloat> @vfptrunc_nxv1f32_nxv1bf16(<vscale x 1 x float> %va) {
+;
+; CHECK-LABEL: vfptrunc_nxv1f32_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fptrunc <vscale x 1 x float> %va to <vscale x 1 x bfloat>
+ ret <vscale x 1 x bfloat> %evec
+}
+
+define <vscale x 2 x bfloat> @vfptrunc_nxv2f32_nxv2bf16(<vscale x 2 x float> %va) {
+;
+; CHECK-LABEL: vfptrunc_nxv2f32_nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %evec = fptrunc <vscale x 2 x float> %va to <vscale x 2 x bfloat>
+ ret <vscale x 2 x bfloat> %evec
+}
+
+define <vscale x 4 x bfloat> @vfptrunc_nxv4f32_nxv4bf16(<vscale x 4 x float> %va) {
+;
+; CHECK-LABEL: vfptrunc_nxv4f32_nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vmv.v.v v8, v10
+; CHECK-NEXT: ret
+ %evec = fptrunc <vscale x 4 x float> %va to <vscale x 4 x bfloat>
+ ret <vscale x 4 x bfloat> %evec
+}
+
+define <vscale x 8 x bfloat> @vfptrunc_nxv8f32_nxv8bf16(<vscale x 8 x float> %va) {
+;
+; CHECK-LABEL: vfptrunc_nxv8f32_nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8
+; CHECK-NEXT: vmv.v.v v8, v12
+; CHECK-NEXT: ret
+ %evec = fptrunc <vscale x 8 x float> %va to <vscale x 8 x bfloat>
+ ret <vscale x 8 x bfloat> %evec
+}
+
+define <vscale x 16 x bfloat> @vfptrunc_nxv16f32_nxv16bf16(<vscale x 16 x float> %va) {
+;
+; CHECK-LABEL: vfptrunc_nxv16f32_nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v16, v8
+; CHECK-NEXT: vmv.v.v v8, v16
+; CHECK-NEXT: ret
+ %evec = fptrunc <vscale x 16 x float> %va to <vscale x 16 x bfloat>
+ ret <vscale x 16 x bfloat> %evec
+}
+
+define <vscale x 1 x bfloat> @vfptrunc_nxv1f64_nxv1bf16(<vscale x 1 x double> %va) {
+;
+; CHECK-LABEL: vfptrunc_nxv1f64_nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v9, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
+; CHECK-NEXT: ret
+ %evec = fptrunc <vscale x 1 x double> %va to <vscale x 1 x bfloat>
+ ret <vscale x 1 x bfloat> %evec
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
index dd122f1f251103..0c3abe37af27a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfhmin,+v,+m,+experimental-zvfbfmin -verify-machineinstrs < %s | FileCheck %s
declare <vscale x 2 x half> @llvm.vp.fptrunc.nxv2f16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
@@ -218,3 +218,53 @@ define <vscale x 32 x float> @vfptrunc_nxv32f32_nxv32f64(<vscale x 32 x double>
%v = call <vscale x 32 x float> @llvm.vp.fptrunc.nxv32f64.nxv32f32(<vscale x 32 x double> %a, <vscale x 32 x i1> %m, i32 %vl)
ret <vscale x 32 x float> %v
}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8, v0.t
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> %m, i32 %vl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f32_unmasked(<vscale x 2 x float> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f32_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v9, v8
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> %m, i32 %vl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfptrunc_nxv2bf16_nxv2f64_unmasked(<vscale x 2 x double> %a, i32 zeroext %vl) {
+; CHECK-LABEL: vfptrunc_nxv2bf16_nxv2f64_unmasked:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
+; CHECK-NEXT: vfncvt.rod.f.f.w v10, v8
+; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
+; CHECK-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fptrunc.nxv2bf16.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x i1> splat (i1 true), i32 %vl)
+ ret <vscale x 2 x bfloat> %v
+}
More information about the llvm-commits
mailing list