[llvm] [llvm][RISCV] Support Zvfbfa codegen for fneg, fabs and copysign (PR #166944)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 9 06:38:22 PST 2025
https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/166944
>From a2c6d6255c6a635be0306da50e079ffb354211d8 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Fri, 7 Nov 2025 06:30:49 -0800
Subject: [PATCH 1/3] [llvm][RISCV] Support Zvfbfa codegen for fneg, fabs and
copysign
This is first patch for Zvfbfa codegen and I'm going to break it down to
several patches to make it easier to reivew.
The codegen supports both scalable vector and fixed length vector on both
native operations and vp intrinsics.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 81 +-
llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td | 83 ++-
.../rvv/fixed-vectors-vcopysign-sdnode.ll | 56 ++
.../RISCV/rvv/fixed-vectors-vcopysign-vp.ll | 188 ++++-
.../RISCV/rvv/fixed-vectors-vfabs-sdnode.ll | 457 ++++++++++++
.../RISCV/rvv/fixed-vectors-vfabs-vp.ll | 300 +++++++-
.../RISCV/rvv/fixed-vectors-vfneg-sdnode.ll | 403 ++++++++++
.../RISCV/rvv/fixed-vectors-vfneg-vp.ll | 276 ++++++-
llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll | 504 ++++++++++++-
llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll | 222 ++++--
llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll | 440 ++++++++++-
.../CodeGen/RISCV/rvv/vfcopysign-sdnode.ll | 702 ++++++++++++++++--
llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll | 198 ++++-
llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll | 404 +++++++++-
14 files changed, 4135 insertions(+), 179 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1977d3372c5f6..4bb83d1f600fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,6 +87,12 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));
+// TODO: Support more ops
+static const unsigned ZvfbfaVPOps[] = {
+ ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {
+ ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -1208,6 +1214,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
};
+ // Sets common actions for zvfbfa, some of instructions are supported
+ // natively so that we don't need to promote them.
+ const auto SetZvfbfaActions = [&](MVT VT) {
+ setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+ Custom);
+ setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+ setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+ setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
+ setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+ Custom);
+ setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
+ setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
+ ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+ ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
+ ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
+ ISD::VECTOR_COMPRESS},
+ VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
+ setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
+ setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+
+ MVT EltVT = VT.getVectorElementType();
+ if (isTypeLegal(EltVT))
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
+ ISD::EXTRACT_VECTOR_ELT},
+ VT, Custom);
+ else
+ setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
+ EltVT, Custom);
+ setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+ ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
+ ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+ ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
+ ISD::VP_SCATTER},
+ VT, Custom);
+ setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
+
+ // Expand FP operations that need libcalls.
+ setOperationAction(FloatingPointLibCallOps, VT, Expand);
+
+ // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
+ if (getLMUL(VT) == RISCVVType::LMUL_8) {
+ setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+ setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+ } else {
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
+ }
+ };
+
if (Subtarget.hasVInstructionsF16()) {
for (MVT VT : F16VecVTs) {
if (!isTypeLegal(VT))
@@ -1222,7 +1283,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
- if (Subtarget.hasVInstructionsBF16Minimal()) {
+ if (Subtarget.hasVInstructionsBF16()) {
+ for (MVT VT : BF16VecVTs) {
+ if (!isTypeLegal(VT))
+ continue;
+ SetZvfbfaActions(VT);
+ }
+ } else if (Subtarget.hasVInstructionsBF16Minimal()) {
for (MVT VT : BF16VecVTs) {
if (!isTypeLegal(VT))
continue;
@@ -1501,6 +1568,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
}
+ if (Subtarget.hasStdExtZvfbfa()) {
+ setOperationAction(ZvfbfaOps, VT, Custom);
+ setOperationAction(ZvfbfaVPOps, VT, Custom);
+ }
setOperationAction(
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
@@ -7245,7 +7316,13 @@ static bool isPromotedOpNeedingSplit(SDValue Op,
return (Op.getValueType() == MVT::nxv32f16 &&
(Subtarget.hasVInstructionsF16Minimal() &&
!Subtarget.hasVInstructionsF16())) ||
- Op.getValueType() == MVT::nxv32bf16;
+ (Op.getValueType() == MVT::nxv32bf16 &&
+ Subtarget.hasVInstructionsBF16Minimal() &&
+ (!Subtarget.hasVInstructionsBF16() ||
+ (std::find(std::begin(ZvfbfaOps), std::end(ZvfbfaOps),
+ Op.getOpcode()) == std::end(ZvfbfaOps) &&
+ std::find(std::begin(ZvfbfaVPOps), std::end(ZvfbfaVPOps),
+ Op.getOpcode()) == std::end(ZvfbfaVPOps))));
}
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index b9c5b75983b1f..49f1c92750f3a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in {
FRM_DYN,
fvti.AVL, fvti.Log2SEW, TA_MA)>;
}
-}
+
+ foreach vti = AllBF16Vectors in {
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg vti.RegClass:$rs2)))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+ def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+ // 13.12. Vector Floating-Point Sign-Injection Instructions
+ def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+ // Handle fneg with VFSGNJN using the same input for both operands.
+ def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
+ (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+ vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (vti.Vector vti.RegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (riscv_fneg_vl vti.RegClass:$rs2,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ srcvalue,
+ (vti.Mask true_mask),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+ (vti.Vector (IMPLICIT_DEF)),
+ vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+ def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+ (SplatFPOp vti.ScalarRegClass:$rs2),
+ vti.RegClass:$passthru,
+ (vti.Mask VMV0:$vm),
+ VLOpFrag),
+ (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+ vti.RegClass:$passthru, vti.RegClass:$rs1,
+ vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+ TAIL_AGNOSTIC)>;
+ }
+ }
} // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
new file mode 100644
index 0000000000000..9cfed6a659c64
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs)
+ ret <2 x bfloat> %r
+}
+
+define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs)
+ ret <4 x bfloat> %r
+}
+
+define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs)
+ ret <8 x bfloat> %r
+}
+
+define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs)
+ ret <16 x bfloat> %r
+}
+
+define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v32bf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs)
+ ret <32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
index a2178e1c571da..967ca092fe3c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
@@ -1,8 +1,180 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN: -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+
+declare <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat>, <4 x bfloat>, <4 x i1>, i32)
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat>, <8 x bfloat>, <8 x i1>, i32)
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat>, <16 x bfloat>, <16 x i1>, i32)
+
+define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfsgnj_vv_v16bf16_unmasked(<16 x bfloat> %va, <16 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> splat (i1 true), i32 %evl)
+ ret <16 x bfloat> %v
+}
declare <2 x half> @llvm.vp.copysign.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
@@ -311,10 +483,10 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
; CHECK-NEXT: mv a0, a2
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v7, v0, 2
-; CHECK-NEXT: bltu a2, a1, .LBB26_2
+; CHECK-NEXT: bltu a2, a1, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t
; CHECK-NEXT: addi a0, a2, -16
@@ -346,10 +518,10 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
; CHECK-NEXT: vle64.v v0, (a0)
; CHECK-NEXT: li a1, 16
; CHECK-NEXT: mv a0, a2
-; CHECK-NEXT: bltu a2, a1, .LBB27_2
+; CHECK-NEXT: bltu a2, a1, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a0, 16
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfsgnj.vv v8, v8, v0
; CHECK-NEXT: addi a0, a2, -16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
new file mode 100644
index 0000000000000..1907e2a689a4b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+
+define <1 x bfloat> @v1bf16(<1 x bfloat> %v) {
+; ZVFH-LABEL: v1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v)
+ ret <1 x bfloat> %r
+}
+
+define <2 x bfloat> @v2bf16(<2 x bfloat> %v) {
+; ZVFH-LABEL: v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v)
+ ret <2 x bfloat> %r
+}
+
+define <4 x bfloat> @v4bf16(<4 x bfloat> %v) {
+; ZVFH-LABEL: v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v)
+ ret <4 x bfloat> %r
+}
+
+define <8 x bfloat> @v8bf16(<8 x bfloat> %v) {
+; ZVFH-LABEL: v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v)
+ ret <8 x bfloat> %r
+}
+
+define <16 x bfloat> @v16bf16(<16 x bfloat> %v) {
+; ZVFH-LABEL: v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v)
+ ret <16 x bfloat> %r
+}
+
+define <32 x bfloat> @v32bf16(<32 x bfloat> %v) {
+; ZVFH-LABEL: v32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: li a1, 32
+; ZVFH-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: li a1, 32
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: li a0, 32
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v)
+ ret <32 x bfloat> %r
+}
+
+declare <1 x half> @llvm.fabs.v1f16(<1 x half>)
+
+define <1 x half> @vfabs_v1f16(<1 x half> %v) {
+; ZVFH-LABEL: vfabs_v1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_v1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_v1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %r = call <1 x half> @llvm.fabs.v1f16(<1 x half> %v)
+ ret <1 x half> %r
+}
+
+declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
+
+define <2 x half> @vfabs_v2f16(<2 x half> %v) {
+; ZVFH-LABEL: vfabs_v2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_v2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_v2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %r = call <2 x half> @llvm.fabs.v2f16(<2 x half> %v)
+ ret <2 x half> %r
+}
+
+declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
+
+define <4 x half> @vfabs_v4f16(<4 x half> %v) {
+; ZVFH-LABEL: vfabs_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_v4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %r = call <4 x half> @llvm.fabs.v4f16(<4 x half> %v)
+ ret <4 x half> %r
+}
+
+declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
+
+define <8 x half> @vfabs_v8f16(<8 x half> %v) {
+; ZVFH-LABEL: vfabs_v8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_v8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_v8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %r = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
+ ret <8 x half> %r
+}
+
+declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
+
+define <16 x half> @vfabs_v16f16(<16 x half> %v) {
+; ZVFH-LABEL: vfabs_v16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_v16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_v16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %r = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
+ ret <16 x half> %r
+}
+
+declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
+
+define <32 x half> @vfabs_v32f16(<32 x half> %v) {
+; ZVFH-LABEL: vfabs_v32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a0, 32
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vfabs.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_v32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: li a1, 32
+; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_v32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: li a1, 32
+; ZVFBFA-NEXT: vsetvli zero, a1, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %r = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
+ ret <32 x half> %r
+}
+
+declare <1 x float> @llvm.fabs.v1f32(<1 x float>)
+
+define <1 x float> @vfabs_v1f32(<1 x float> %v) {
+; CHECK-LABEL: vfabs_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <1 x float> @llvm.fabs.v1f32(<1 x float> %v)
+ ret <1 x float> %r
+}
+
+declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
+
+define <2 x float> @vfabs_v2f32(<2 x float> %v) {
+; CHECK-LABEL: vfabs_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %v)
+ ret <2 x float> %r
+}
+
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+
+define <4 x float> @vfabs_v4f32(<4 x float> %v) {
+; CHECK-LABEL: vfabs_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %v)
+ ret <4 x float> %r
+}
+
+declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
+
+define <8 x float> @vfabs_v8f32(<8 x float> %v) {
+; CHECK-LABEL: vfabs_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <8 x float> @llvm.fabs.v8f32(<8 x float> %v)
+ ret <8 x float> %r
+}
+
+declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
+
+define <16 x float> @vfabs_v16f32(<16 x float> %v) {
+; CHECK-LABEL: vfabs_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <16 x float> @llvm.fabs.v16f32(<16 x float> %v)
+ ret <16 x float> %r
+}
+
+declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
+
+define <1 x double> @vfabs_v1f64(<1 x double> %v) {
+; CHECK-LABEL: vfabs_v1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <1 x double> @llvm.fabs.v1f64(<1 x double> %v)
+ ret <1 x double> %r
+}
+
+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+define <2 x double> @vfabs_v2f64(<2 x double> %v) {
+; CHECK-LABEL: vfabs_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <2 x double> @llvm.fabs.v2f64(<2 x double> %v)
+ ret <2 x double> %r
+}
+
+declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
+
+define <4 x double> @vfabs_v4f64(<4 x double> %v) {
+; CHECK-LABEL: vfabs_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <4 x double> @llvm.fabs.v4f64(<4 x double> %v)
+ ret <4 x double> %r
+}
+
+declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
+
+define <8 x double> @vfabs_v8f64(<8 x double> %v) {
+; CHECK-LABEL: vfabs_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: ret
+ %r = call <8 x double> @llvm.fabs.v8f64(<8 x double> %v)
+ ret <8 x double> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
index 08f486b601328..8b76236f3491d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
@@ -1,12 +1,232 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+declare <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfabs_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfabs_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat>, <4 x i1>, i32)
+
+define <4 x bfloat> @vfabs_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfabs_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat>, <8 x i1>, i32)
+
+define <8 x bfloat> @vfabs_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfabs_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat>, <16 x i1>, i32)
+
+define <16 x bfloat> @vfabs_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfabs_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_v16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_v16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl)
+ ret <16 x bfloat> %v
+}
declare <2 x half> @llvm.vp.fabs.v2f16(<2 x half>, <2 x i1>, i32)
@@ -24,6 +244,14 @@ define <2 x half> @vfabs_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
@@ -42,6 +270,14 @@ define <2 x half> @vfabs_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fabs.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
ret <2 x half> %v
}
@@ -62,6 +298,14 @@ define <4 x half> @vfabs_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
@@ -80,6 +324,14 @@ define <4 x half> @vfabs_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fabs.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x half> %v
}
@@ -100,6 +352,14 @@ define <8 x half> @vfabs_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
@@ -118,6 +378,14 @@ define <8 x half> @vfabs_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fabs.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
ret <8 x half> %v
}
@@ -138,6 +406,14 @@ define <16 x half> @vfabs_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
@@ -156,6 +432,14 @@ define <16 x half> @vfabs_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_v16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fabs.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x half> %v
}
@@ -367,10 +651,10 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
+; CHECK-NEXT: bltu a0, a2, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
@@ -390,10 +674,10 @@ define <32 x double> @vfabs_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB27_2
+; CHECK-NEXT: bltu a0, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
new file mode 100644
index 0000000000000..e02b3631d588f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
@@ -0,0 +1,403 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+
+define <1 x bfloat> @v1bf16(<1 x bfloat> %va) {
+; ZVFH-LABEL: v1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %vb = fneg <1 x bfloat> %va
+ ret <1 x bfloat> %vb
+}
+
+define <2 x bfloat> @v2bf16(<2 x bfloat> %va) {
+; ZVFH-LABEL: v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %vb = fneg <2 x bfloat> %va
+ ret <2 x bfloat> %vb
+}
+
+define <4 x bfloat> @v4bf16(<4 x bfloat> %va) {
+; ZVFH-LABEL: v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %vb = fneg <4 x bfloat> %va
+ ret <4 x bfloat> %vb
+}
+
+define <8 x bfloat> @v8bf16(<8 x bfloat> %va) {
+; ZVFH-LABEL: v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %vb = fneg <8 x bfloat> %va
+ ret <8 x bfloat> %vb
+}
+
+define <16 x bfloat> @v16bf16(<16 x bfloat> %va) {
+; ZVFH-LABEL: v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %vb = fneg <16 x bfloat> %va
+ ret <16 x bfloat> %vb
+}
+
+define <32 x bfloat> @v32bf16(<32 x bfloat> %va) {
+; ZVFH-LABEL: v32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a0, 32
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: v32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: li a0, 32
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: v32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: li a0, 32
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %vb = fneg <32 x bfloat> %va
+ ret <32 x bfloat> %vb
+}
+
+define <1 x half> @vfneg_vv_v1f16(<1 x half> %va) {
+; ZVFH-LABEL: vfneg_vv_v1f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v1f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %vb = fneg <1 x half> %va
+ ret <1 x half> %vb
+}
+
+define <2 x half> @vfneg_vv_v2f16(<2 x half> %va) {
+; ZVFH-LABEL: vfneg_vv_v2f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v2f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %vb = fneg <2 x half> %va
+ ret <2 x half> %vb
+}
+
+define <4 x half> @vfneg_vv_v4f16(<4 x half> %va) {
+; ZVFH-LABEL: vfneg_vv_v4f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v4f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %vb = fneg <4 x half> %va
+ ret <4 x half> %vb
+}
+
+define <8 x half> @vfneg_vv_v8f16(<8 x half> %va) {
+; ZVFH-LABEL: vfneg_vv_v8f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v8f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %vb = fneg <8 x half> %va
+ ret <8 x half> %vb
+}
+
+define <16 x half> @vfneg_vv_v16f16(<16 x half> %va) {
+; ZVFH-LABEL: vfneg_vv_v16f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v16f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
+ %vb = fneg <16 x half> %va
+ ret <16 x half> %vb
+}
+
+define <32 x half> @vfneg_vv_v32f16(<32 x half> %va) {
+; ZVFH-LABEL: vfneg_vv_v32f16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: li a0, 32
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vfneg.v v8, v8
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v32f16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: li a0, 32
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: li a0, 32
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
+ %vb = fneg <32 x half> %va
+ ret <32 x half> %vb
+}
+
+define <1 x float> @vfneg_vv_v1f32(<1 x float> %va) {
+; CHECK-LABEL: vfneg_vv_v1f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <1 x float> %va
+ ret <1 x float> %vb
+}
+
+define <2 x float> @vfneg_vv_v2f32(<2 x float> %va) {
+; CHECK-LABEL: vfneg_vv_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <2 x float> %va
+ ret <2 x float> %vb
+}
+
+define <4 x float> @vfneg_vv_v4f32(<4 x float> %va) {
+; CHECK-LABEL: vfneg_vv_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <4 x float> %va
+ ret <4 x float> %vb
+}
+
+define <8 x float> @vfneg_vv_v8f32(<8 x float> %va) {
+; CHECK-LABEL: vfneg_vv_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <8 x float> %va
+ ret <8 x float> %vb
+}
+
+define <16 x float> @vfneg_vv_v16f32(<16 x float> %va) {
+; CHECK-LABEL: vfneg_vv_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <16 x float> %va
+ ret <16 x float> %vb
+}
+
+define <1 x double> @vfneg_vv_v1f64(<1 x double> %va) {
+; CHECK-LABEL: vfneg_vv_v1f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <1 x double> %va
+ ret <1 x double> %vb
+}
+
+define <2 x double> @vfneg_vv_v2f64(<2 x double> %va) {
+; CHECK-LABEL: vfneg_vv_v2f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <2 x double> %va
+ ret <2 x double> %vb
+}
+
+define <4 x double> @vfneg_vv_v4f64(<4 x double> %va) {
+; CHECK-LABEL: vfneg_vv_v4f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <4 x double> %va
+ ret <4 x double> %vb
+}
+
+define <8 x double> @vfneg_vv_v8f64(<8 x double> %va) {
+; CHECK-LABEL: vfneg_vv_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: ret
+ %vb = fneg <8 x double> %va
+ ret <8 x double> %vb
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
index 968fd9f9bab80..4bfebc29837d8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
@@ -1,12 +1,216 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+declare <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfneg_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfneg_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat> %va, <2 x i1> splat (i1 true), i32 %evl)
+ ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat>, <4 x i1>, i32)
+
+define <4 x bfloat> @vfneg_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfneg_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat> %va, <4 x i1> splat (i1 true), i32 %evl)
+ ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat>, <8 x i1>, i32)
+
+define <8 x bfloat> @vfneg_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfneg_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat> %va, <8 x i1> splat (i1 true), i32 %evl)
+ ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat>, <16 x i1>, i32)
+
+define <16 x bfloat> @vfneg_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 %evl)
+ ret <16 x bfloat> %v
+}
+
+define <16 x bfloat> @vfneg_vv_v16bf16_unmasked(<16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_v16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_v16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat> %va, <16 x i1> splat (i1 true), i32 %evl)
+ ret <16 x bfloat> %v
+}
declare <2 x half> @llvm.vp.fneg.v2f16(<2 x half>, <2 x i1>, i32)
@@ -23,6 +227,13 @@ define <2 x half> @vfneg_vv_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl)
ret <2 x half> %v
}
@@ -40,6 +251,13 @@ define <2 x half> @vfneg_vv_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <2 x half> @llvm.vp.fneg.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl)
ret <2 x half> %v
}
@@ -59,6 +277,13 @@ define <4 x half> @vfneg_vv_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl)
ret <4 x half> %v
}
@@ -76,6 +301,13 @@ define <4 x half> @vfneg_vv_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <4 x half> @llvm.vp.fneg.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl)
ret <4 x half> %v
}
@@ -95,6 +327,13 @@ define <8 x half> @vfneg_vv_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl)
ret <8 x half> %v
}
@@ -112,6 +351,13 @@ define <8 x half> @vfneg_vv_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) {
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <8 x half> @llvm.vp.fneg.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl)
ret <8 x half> %v
}
@@ -131,6 +377,13 @@ define <16 x half> @vfneg_vv_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl)
ret <16 x half> %v
}
@@ -148,6 +401,13 @@ define <16 x half> @vfneg_vv_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl)
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_v16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <16 x half> @llvm.vp.fneg.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl)
ret <16 x half> %v
}
@@ -359,10 +619,10 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
; CHECK-NEXT: vslidedown.vi v24, v0, 2
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB26_2
+; CHECK-NEXT: bltu a0, a2, .LBB34_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB26_2:
+; CHECK-NEXT: .LBB34_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8, v0.t
; CHECK-NEXT: addi a1, a0, -16
@@ -382,10 +642,10 @@ define <32 x double> @vfneg_vv_v32f64_unmasked(<32 x double> %va, i32 zeroext %e
; CHECK: # %bb.0:
; CHECK-NEXT: li a2, 16
; CHECK-NEXT: mv a1, a0
-; CHECK-NEXT: bltu a0, a2, .LBB27_2
+; CHECK-NEXT: bltu a0, a2, .LBB35_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: li a1, 16
-; CHECK-NEXT: .LBB27_2:
+; CHECK-NEXT: .LBB35_2:
; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: addi a1, a0, -16
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
index ccf82b93d6b75..9b3d73c7acc0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
@@ -1,12 +1,388 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+declare <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv1bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv1bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v12, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v12, v12, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: vor.vv v8, v8, v16, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_nxv32bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v16, v16, a1
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: vor.vv v8, v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfsgnj_vv_nxv32bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a1
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half>, <vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -26,6 +402,16 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -46,6 +432,16 @@ define <vscale x 1 x half> @vfsgnj_vv_nxv1f16_unmasked(<vscale x 1 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv1f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.copysign.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x half> %vb, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -68,6 +464,16 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -88,6 +494,16 @@ define <vscale x 2 x half> @vfsgnj_vv_nxv2f16_unmasked(<vscale x 2 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.copysign.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x half> %vb, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -110,6 +526,16 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v9, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -130,6 +556,16 @@ define <vscale x 4 x half> @vfsgnj_vv_nxv4f16_unmasked(<vscale x 4 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.copysign.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x half> %vb, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -152,6 +588,16 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v10, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v10, v10, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -172,6 +618,16 @@ define <vscale x 8 x half> @vfsgnj_vv_nxv8f16_unmasked(<vscale x 8 x half> %va,
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v10, v10, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.copysign.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x half> %vb, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -194,6 +650,16 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16(<vscale x 16 x half> %va, <vscal
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v12, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v12, v12, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v12, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -214,6 +680,16 @@ define <vscale x 16 x half> @vfsgnj_vv_nxv16f16_unmasked(<vscale x 16 x half> %v
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v12, v12, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.copysign.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x half> %vb, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -236,6 +712,16 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16(<vscale x 32 x half> %va, <vscal
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: vor.vv v8, v8, v16, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v16, v16, a1, v0.t
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: vor.vv v8, v8, v16, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -256,6 +742,16 @@ define <vscale x 32 x half> @vfsgnj_vv_nxv32f16_unmasked(<vscale x 32 x half> %v
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_nxv32f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v16, v16, a1
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.copysign.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x half> %vb, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
index 1d8638844af7f..28426ad018b83 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll
@@ -11,75 +11,165 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %v) {
-; CHECK-LABEL: nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x bfloat> @llvm.fabs.nxv1bf16(<vscale x 1 x bfloat> %v)
ret <vscale x 1 x bfloat> %r
}
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %v) {
-; CHECK-LABEL: nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %v)
ret <vscale x 2 x bfloat> %r
}
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %v) {
-; CHECK-LABEL: nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %v)
ret <vscale x 4 x bfloat> %r
}
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %v) {
-; CHECK-LABEL: nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %v)
ret <vscale x 8 x bfloat> %r
}
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %v) {
-; CHECK-LABEL: nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> %v)
ret <vscale x 16 x bfloat> %r
}
define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %v) {
-; CHECK-LABEL: nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x bfloat> @llvm.fabs.nxv32bf16(<vscale x 32 x bfloat> %v)
ret <vscale x 32 x bfloat> %r
}
@@ -100,6 +190,14 @@ define <vscale x 1 x half> @vfabs_nxv1f16(<vscale x 1 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x half> @llvm.fabs.nxv1f16(<vscale x 1 x half> %v)
ret <vscale x 1 x half> %r
}
@@ -120,6 +218,14 @@ define <vscale x 2 x half> @vfabs_nxv2f16(<vscale x 2 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x half> @llvm.fabs.nxv2f16(<vscale x 2 x half> %v)
ret <vscale x 2 x half> %r
}
@@ -140,6 +246,14 @@ define <vscale x 4 x half> @vfabs_nxv4f16(<vscale x 4 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x half> @llvm.fabs.nxv4f16(<vscale x 4 x half> %v)
ret <vscale x 4 x half> %r
}
@@ -160,6 +274,14 @@ define <vscale x 8 x half> @vfabs_nxv8f16(<vscale x 8 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x half> @llvm.fabs.nxv8f16(<vscale x 8 x half> %v)
ret <vscale x 8 x half> %r
}
@@ -180,6 +302,14 @@ define <vscale x 16 x half> @vfabs_nxv16f16(<vscale x 16 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x half> @llvm.fabs.nxv16f16(<vscale x 16 x half> %v)
ret <vscale x 16 x half> %r
}
@@ -200,6 +330,14 @@ define <vscale x 32 x half> @vfabs_nxv32f16(<vscale x 32 x half> %v) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x half> @llvm.fabs.nxv32f16(<vscale x 32 x half> %v)
ret <vscale x 32 x half> %r
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
index 8f9f9c4256c8f..697367e9b432d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
@@ -1,12 +1,340 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+declare <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv1bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv1bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfabs_vv_nxv32bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: addi a1, a1, -1
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfabs_vv_nxv32bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: addi a1, a1, -1
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfabs.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -24,6 +352,14 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -42,6 +378,14 @@ define <vscale x 1 x half> @vfabs_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv1f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fabs.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -62,6 +406,14 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -80,6 +432,14 @@ define <vscale x 2 x half> @vfabs_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fabs.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -100,6 +460,14 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -118,6 +486,14 @@ define <vscale x 4 x half> @vfabs_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fabs.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -138,6 +514,14 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -156,6 +540,14 @@ define <vscale x 8 x half> @vfabs_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fabs.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -176,6 +568,14 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -194,6 +594,14 @@ define <vscale x 16 x half> @vfabs_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fabs.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -214,6 +622,14 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -232,6 +648,14 @@ define <vscale x 32 x half> @vfabs_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vand.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfabs_vv_nxv32f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: addi a1, a1, -1
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fabs.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -473,10 +897,10 @@ define <vscale x 16 x double> @vfabs_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: and a2, a2, a3
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8, v0.t
@@ -495,10 +919,10 @@ define <vscale x 16 x double> @vfabs_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
index 83f588ce5027d..bef2e8d3b57fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll
@@ -11,87 +11,189 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs) {
-; CHECK-LABEL: nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs)
ret <vscale x 1 x bfloat> %r
}
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs) {
-; CHECK-LABEL: nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs)
ret <vscale x 2 x bfloat> %r
}
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs) {
-; CHECK-LABEL: nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vand.vx v9, v9, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v9
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vand.vx v9, v9, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v9
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vand.vx v9, v9, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v9
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs)
ret <vscale x 4 x bfloat> %r
}
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs) {
-; CHECK-LABEL: nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vand.vx v10, v10, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v10
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vand.vx v10, v10, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v10
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vand.vx v10, v10, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v10
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs)
ret <vscale x 8 x bfloat> %r
}
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs) {
-; CHECK-LABEL: nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vand.vx v12, v12, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v12
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vand.vx v12, v12, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v12
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vand.vx v12, v12, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v12
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs)
ret <vscale x 16 x bfloat> %r
}
define <vscale x 32 x bfloat> @nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs) {
-; CHECK-LABEL: nxv32bf32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vand.vx v16, v16, a0
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vand.vx v8, v8, a0
-; CHECK-NEXT: vor.vv v8, v8, v16
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv32bf32:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vand.vx v16, v16, a0
+; ZVFH-NEXT: addi a0, a0, -1
+; ZVFH-NEXT: vand.vx v8, v8, a0
+; ZVFH-NEXT: vor.vv v8, v8, v16
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv32bf32:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vand.vx v16, v16, a0
+; ZVFHMIN-NEXT: addi a0, a0, -1
+; ZVFHMIN-NEXT: vand.vx v8, v8, a0
+; ZVFHMIN-NEXT: vor.vv v8, v8, v16
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv32bf32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfsgnj.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs)
ret <vscale x 32 x bfloat> %r
}
@@ -114,6 +216,16 @@ define <vscale x 1 x half> @vfcopysign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %vs)
ret <vscale x 1 x half> %r
}
@@ -136,6 +248,18 @@ define <vscale x 1 x half> @vfcopysign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %splat)
@@ -159,6 +283,17 @@ define <vscale x 1 x half> @vfcopynsign_vv_nxv1f16(<vscale x 1 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 1 x half> %vs
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %n)
ret <vscale x 1 x half> %r
@@ -183,6 +318,19 @@ define <vscale x 1 x half> @vfcopynsign_vf_nxv1f16(<vscale x 1 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 1 x half> %head, <vscale x 1 x half> poison, <vscale x 1 x i32> zeroinitializer
%n = fneg <vscale x 1 x half> %splat
@@ -208,6 +356,17 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vand.vx v9, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 1 x float> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
ret <vscale x 1 x half> %r
@@ -235,6 +394,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v9, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
%esplat = fptrunc <vscale x 1 x float> %splat to <vscale x 1 x half>
@@ -261,6 +433,18 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vxor.vx v9, v10, a0
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 1 x float> %vs
%eneg = fptrunc <vscale x 1 x float> %n to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %eneg)
@@ -290,6 +474,20 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v9
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v9, v10, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 1 x float> %head, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
%n = fneg <vscale x 1 x float> %splat
@@ -320,6 +518,19 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 1 x double> %vs to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %e)
ret <vscale x 1 x half> %r
@@ -351,6 +562,21 @@ define <vscale x 1 x half> @vfcopysign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1 x
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
%esplat = fptrunc <vscale x 1 x double> %splat to <vscale x 1 x half>
@@ -381,6 +607,20 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 1 x double> %vs
%eneg = fptrunc <vscale x 1 x double> %n to <vscale x 1 x half>
%r = call <vscale x 1 x half> @llvm.copysign.nxv1f16(<vscale x 1 x half> %vm, <vscale x 1 x half> %eneg)
@@ -414,6 +654,22 @@ define <vscale x 1 x half> @vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64(<vscale x 1
; ZVFHMIN-NEXT: vand.vx v9, v9, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv1f16_nxv1f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v9, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v10, v9
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vfncvt.f.f.w v9, v10
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 1 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 1 x double> %head, <vscale x 1 x double> poison, <vscale x 1 x i32> zeroinitializer
%n = fneg <vscale x 1 x double> %splat
@@ -440,6 +696,16 @@ define <vscale x 2 x half> @vfcopysign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %vs)
ret <vscale x 2 x half> %r
}
@@ -462,6 +728,18 @@ define <vscale x 2 x half> @vfcopysign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %splat)
@@ -485,6 +763,17 @@ define <vscale x 2 x half> @vfcopynsign_vv_nxv2f16(<vscale x 2 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 2 x half> %vs
%r = call <vscale x 2 x half> @llvm.copysign.nxv2f16(<vscale x 2 x half> %vm, <vscale x 2 x half> %n)
ret <vscale x 2 x half> %r
@@ -509,6 +798,19 @@ define <vscale x 2 x half> @vfcopynsign_vf_nxv2f16(<vscale x 2 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 2 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 2 x half> %head, <vscale x 2 x half> poison, <vscale x 2 x i32> zeroinitializer
%n = fneg <vscale x 2 x half> %splat
@@ -534,6 +836,16 @@ define <vscale x 4 x half> @vfcopysign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %vs)
ret <vscale x 4 x half> %r
}
@@ -556,6 +868,18 @@ define <vscale x 4 x half> @vfcopysign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %splat)
@@ -579,6 +903,17 @@ define <vscale x 4 x half> @vfcopynsign_vv_nxv4f16(<vscale x 4 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v9, v9, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 4 x half> %vs
%r = call <vscale x 4 x half> @llvm.copysign.nxv4f16(<vscale x 4 x half> %vm, <vscale x 4 x half> %n)
ret <vscale x 4 x half> %r
@@ -603,6 +938,19 @@ define <vscale x 4 x half> @vfcopynsign_vf_nxv4f16(<vscale x 4 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v9, v9, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v9
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v9, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v9, v9, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v9, v9, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v9
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 4 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 4 x half> %head, <vscale x 4 x half> poison, <vscale x 4 x i32> zeroinitializer
%n = fneg <vscale x 4 x half> %splat
@@ -628,6 +976,16 @@ define <vscale x 8 x half> @vfcopysign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsca
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %vs)
ret <vscale x 8 x half> %r
}
@@ -650,6 +1008,18 @@ define <vscale x 8 x half> @vfcopysign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v10, v10, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v10, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %splat)
@@ -673,6 +1043,17 @@ define <vscale x 8 x half> @vfcopynsign_vv_nxv8f16(<vscale x 8 x half> %vm, <vsc
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 8 x half> %vs
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %n)
ret <vscale x 8 x half> %r
@@ -697,6 +1078,19 @@ define <vscale x 8 x half> @vfcopynsign_vf_nxv8f16(<vscale x 8 x half> %vm, half
; ZVFHMIN-NEXT: vand.vx v10, v10, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v10, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 8 x half> %head, <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
%n = fneg <vscale x 8 x half> %splat
@@ -722,6 +1116,17 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 8 x float> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
ret <vscale x 8 x half> %r
@@ -749,6 +1154,19 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
%esplat = fptrunc <vscale x 8 x float> %splat to <vscale x 8 x half>
@@ -775,6 +1193,18 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 8 x float> %vs
%eneg = fptrunc <vscale x 8 x float> %n to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %eneg)
@@ -804,6 +1234,20 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f32:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v12, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x float> poison, float %s, i32 0
%splat = shufflevector <vscale x 8 x float> %head, <vscale x 8 x float> poison, <vscale x 8 x i32> zeroinitializer
%n = fneg <vscale x 8 x float> %splat
@@ -834,6 +1278,19 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vv_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%e = fptrunc <vscale x 8 x double> %vs to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %e)
ret <vscale x 8 x half> %r
@@ -865,6 +1322,21 @@ define <vscale x 8 x half> @vfcopysign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8 x
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_exttrunc_vf_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
%esplat = fptrunc <vscale x 8 x double> %splat to <vscale x 8 x half>
@@ -895,6 +1367,20 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vv_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 8 x double> %vs
%eneg = fptrunc <vscale x 8 x double> %n to <vscale x 8 x half>
%r = call <vscale x 8 x half> @llvm.copysign.nxv8f16(<vscale x 8 x half> %vm, <vscale x 8 x half> %eneg)
@@ -928,6 +1414,22 @@ define <vscale x 8 x half> @vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64(<vscale x 8
; ZVFHMIN-NEXT: vand.vx v10, v10, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v10
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_exttrunc_vf_nxv8f16_nxv8f64:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; ZVFBFA-NEXT: vfmv.v.f v16, fa0
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: addi a1, a0, -1
+; ZVFBFA-NEXT: vsetvli zero, zero, e32, m4, ta, ma
+; ZVFBFA-NEXT: vfncvt.rod.f.f.w v12, v16
+; ZVFBFA-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vand.vx v8, v8, a1
+; ZVFBFA-NEXT: vfncvt.f.f.w v10, v12
+; ZVFBFA-NEXT: vxor.vx v10, v10, a0
+; ZVFBFA-NEXT: vand.vx v10, v10, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v10
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 8 x double> poison, double %s, i32 0
%splat = shufflevector <vscale x 8 x double> %head, <vscale x 8 x double> poison, <vscale x 8 x i32> zeroinitializer
%n = fneg <vscale x 8 x double> %splat
@@ -954,6 +1456,16 @@ define <vscale x 16 x half> @vfcopysign_vv_nxv16f16(<vscale x 16 x half> %vm, <v
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vand.vx v12, v12, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %vs)
ret <vscale x 16 x half> %r
}
@@ -976,6 +1488,18 @@ define <vscale x 16 x half> @vfcopysign_vf_nxv16f16(<vscale x 16 x half> %vm, ha
; ZVFHMIN-NEXT: vand.vx v12, v12, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v12, v12, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %splat)
@@ -999,6 +1523,17 @@ define <vscale x 16 x half> @vfcopynsign_vv_nxv16f16(<vscale x 16 x half> %vm, <
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v12, v12, a0
+; ZVFBFA-NEXT: vand.vx v12, v12, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 16 x half> %vs
%r = call <vscale x 16 x half> @llvm.copysign.nxv16f16(<vscale x 16 x half> %vm, <vscale x 16 x half> %n)
ret <vscale x 16 x half> %r
@@ -1023,6 +1558,19 @@ define <vscale x 16 x half> @vfcopynsign_vf_nxv16f16(<vscale x 16 x half> %vm, h
; ZVFHMIN-NEXT: vand.vx v12, v12, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v12
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v12, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v12, v12, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v12, v12, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v12
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 16 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 16 x half> %head, <vscale x 16 x half> poison, <vscale x 16 x i32> zeroinitializer
%n = fneg <vscale x 16 x half> %splat
@@ -1048,6 +1596,16 @@ define <vscale x 32 x half> @vfcopysign_vv_nxv32f16(<vscale x 32 x half> %vm, <v
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vand.vx v16, v16, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %vs)
ret <vscale x 32 x half> %r
}
@@ -1070,6 +1628,18 @@ define <vscale x 32 x half> @vfcopysign_vf_nxv32f16(<vscale x 32 x half> %vm, ha
; ZVFHMIN-NEXT: vand.vx v16, v16, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopysign_vf_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v16, v16, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %splat)
@@ -1093,6 +1663,17 @@ define <vscale x 32 x half> @vfcopynsign_vv_nxv32f16(<vscale x 32 x half> %vm, <
; ZVFHMIN-NEXT: vand.vx v8, v8, a0
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v16, v16, a0
+; ZVFBFA-NEXT: vand.vx v16, v16, a0
+; ZVFBFA-NEXT: addi a0, a0, -1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%n = fneg <vscale x 32 x half> %vs
%r = call <vscale x 32 x half> @llvm.copysign.nxv32f16(<vscale x 32 x half> %vm, <vscale x 32 x half> %n)
ret <vscale x 32 x half> %r
@@ -1117,6 +1698,19 @@ define <vscale x 32 x half> @vfcopynsign_vf_nxv32f16(<vscale x 32 x half> %vm, h
; ZVFHMIN-NEXT: vand.vx v16, v16, a1
; ZVFHMIN-NEXT: vor.vv v8, v8, v16
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfcopynsign_vf_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: fmv.x.h a0, fa0
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli a2, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vmv.v.x v16, a0
+; ZVFBFA-NEXT: addi a0, a1, -1
+; ZVFBFA-NEXT: vxor.vx v16, v16, a1
+; ZVFBFA-NEXT: vand.vx v8, v8, a0
+; ZVFBFA-NEXT: vand.vx v16, v16, a1
+; ZVFBFA-NEXT: vor.vv v8, v8, v16
+; ZVFBFA-NEXT: ret
%head = insertelement <vscale x 32 x half> poison, half %s, i32 0
%splat = shufflevector <vscale x 32 x half> %head, <vscale x 32 x half> poison, <vscale x 32 x i32> zeroinitializer
%n = fneg <vscale x 32 x half> %splat
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
index 9f456e97be11d..c0b4916a54e51 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll
@@ -11,69 +11,153 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
; RUN: --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
+; RUN: --check-prefixes=CHECK,ZVFBFA
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %va) {
-; CHECK-LABEL: nxv1bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 1 x bfloat> %va
ret <vscale x 1 x bfloat> %vb
}
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %va) {
-; CHECK-LABEL: nxv2bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 2 x bfloat> %va
ret <vscale x 2 x bfloat> %vb
}
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %va) {
-; CHECK-LABEL: nxv4bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 4 x bfloat> %va
ret <vscale x 4 x bfloat> %vb
}
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %va) {
-; CHECK-LABEL: nxv8bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 8 x bfloat> %va
ret <vscale x 8 x bfloat> %vb
}
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %va) {
-; CHECK-LABEL: nxv16bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 16 x bfloat> %va
ret <vscale x 16 x bfloat> %vb
}
define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %va) {
-; CHECK-LABEL: nxv32bf16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 8
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vxor.vx v8, v8, a0
-; CHECK-NEXT: ret
+; ZVFH-LABEL: nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a0, 8
+; ZVFH-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a0
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a0, 8
+; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli a0, zero, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 32 x bfloat> %va
ret <vscale x 32 x bfloat> %vb
}
@@ -91,6 +175,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 1 x half> %va
ret <vscale x 1 x half> %vb
}
@@ -108,6 +199,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 2 x half> %va
ret <vscale x 2 x half> %vb
}
@@ -125,6 +223,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 4 x half> %va
ret <vscale x 4 x half> %vb
}
@@ -142,6 +247,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 8 x half> %va
ret <vscale x 8 x half> %vb
}
@@ -159,6 +271,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 16 x half> %va
ret <vscale x 16 x half> %vb
}
@@ -176,6 +295,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va) {
; ZVFHMIN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a0, 8
+; ZVFBFA-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a0
+; ZVFBFA-NEXT: ret
%vb = fneg <vscale x 32 x half> %va
ret <vscale x 32 x half> %vb
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
index bbab056f0ff46..80bb0c70cde29 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
@@ -1,12 +1,316 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=ilp32d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
+
+declare <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv1bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv1bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv1bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 1 x bfloat> %v
+}
+
+declare <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
+
+define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv2bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv2bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv2bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 2 x bfloat> %v
+}
+
+declare <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
+
+define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv4bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv4bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv4bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 4 x bfloat> %v
+}
+
+declare <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
+
+define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv8bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv8bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv8bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 8 x bfloat> %v
+}
+
+declare <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
+
+define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv16bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv16bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv16bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 16 x bfloat> %v
+}
+
+declare <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
+
+define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv32bf16:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32bf16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8, v0.t
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
+
+define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16_unmasked(<vscale x 32 x bfloat> %va, i32 zeroext %evl) {
+; ZVFH-LABEL: vfneg_vv_nxv32bf16_unmasked:
+; ZVFH: # %bb.0:
+; ZVFH-NEXT: lui a1, 8
+; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFH-NEXT: vxor.vx v8, v8, a1
+; ZVFH-NEXT: ret
+;
+; ZVFHMIN-LABEL: vfneg_vv_nxv32bf16_unmasked:
+; ZVFHMIN: # %bb.0:
+; ZVFHMIN-NEXT: lui a1, 8
+; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
+; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32bf16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
+; ZVFBFA-NEXT: vfneg.v v8, v8
+; ZVFBFA-NEXT: ret
+ %v = call <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
+ ret <vscale x 32 x bfloat> %v
+}
declare <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half>, <vscale x 1 x i1>, i32)
@@ -23,6 +327,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16(<vscale x 1 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> %m, i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -40,6 +351,13 @@ define <vscale x 1 x half> @vfneg_vv_nxv1f16_unmasked(<vscale x 1 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv1f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 1 x half> @llvm.vp.fneg.nxv1f16(<vscale x 1 x half> %va, <vscale x 1 x i1> splat (i1 true), i32 %evl)
ret <vscale x 1 x half> %v
}
@@ -59,6 +377,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16(<vscale x 2 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> %m, i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -76,6 +401,13 @@ define <vscale x 2 x half> @vfneg_vv_nxv2f16_unmasked(<vscale x 2 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv2f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 2 x half> @llvm.vp.fneg.nxv2f16(<vscale x 2 x half> %va, <vscale x 2 x i1> splat (i1 true), i32 %evl)
ret <vscale x 2 x half> %v
}
@@ -95,6 +427,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16(<vscale x 4 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> %m, i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -112,6 +451,13 @@ define <vscale x 4 x half> @vfneg_vv_nxv4f16_unmasked(<vscale x 4 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv4f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m1, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 4 x half> @llvm.vp.fneg.nxv4f16(<vscale x 4 x half> %va, <vscale x 4 x i1> splat (i1 true), i32 %evl)
ret <vscale x 4 x half> %v
}
@@ -131,6 +477,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16(<vscale x 8 x half> %va, <vscale x
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> %m, i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -148,6 +501,13 @@ define <vscale x 8 x half> @vfneg_vv_nxv8f16_unmasked(<vscale x 8 x half> %va, i
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv8f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m2, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 8 x half> @llvm.vp.fneg.nxv8f16(<vscale x 8 x half> %va, <vscale x 8 x i1> splat (i1 true), i32 %evl)
ret <vscale x 8 x half> %v
}
@@ -167,6 +527,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16(<vscale x 16 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -184,6 +551,13 @@ define <vscale x 16 x half> @vfneg_vv_nxv16f16_unmasked(<vscale x 16 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv16f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 16 x half> @llvm.vp.fneg.nxv16f16(<vscale x 16 x half> %va, <vscale x 16 x i1> splat (i1 true), i32 %evl)
ret <vscale x 16 x half> %v
}
@@ -203,6 +577,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16(<vscale x 32 x half> %va, <vscale
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1, v0.t
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32f16:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1, v0.t
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> %m, i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -220,6 +601,13 @@ define <vscale x 32 x half> @vfneg_vv_nxv32f16_unmasked(<vscale x 32 x half> %va
; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m8, ta, ma
; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
; ZVFHMIN-NEXT: ret
+;
+; ZVFBFA-LABEL: vfneg_vv_nxv32f16_unmasked:
+; ZVFBFA: # %bb.0:
+; ZVFBFA-NEXT: lui a1, 8
+; ZVFBFA-NEXT: vsetvli zero, a0, e16, m8, ta, ma
+; ZVFBFA-NEXT: vxor.vx v8, v8, a1
+; ZVFBFA-NEXT: ret
%v = call <vscale x 32 x half> @llvm.vp.fneg.nxv32f16(<vscale x 32 x half> %va, <vscale x 32 x i1> splat (i1 true), i32 %evl)
ret <vscale x 32 x half> %v
}
@@ -461,10 +849,10 @@ define <vscale x 16 x double> @vfneg_vv_nxv16f64(<vscale x 16 x double> %va, <vs
; CHECK-NEXT: and a2, a2, a3
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v16, v16, v0.t
-; CHECK-NEXT: bltu a0, a1, .LBB32_2
+; CHECK-NEXT: bltu a0, a1, .LBB44_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB32_2:
+; CHECK-NEXT: .LBB44_2:
; CHECK-NEXT: vmv1r.v v0, v24
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8, v0.t
@@ -483,10 +871,10 @@ define <vscale x 16 x double> @vfneg_vv_nxv16f64_unmasked(<vscale x 16 x double>
; CHECK-NEXT: and a2, a3, a2
; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v16, v16
-; CHECK-NEXT: bltu a0, a1, .LBB33_2
+; CHECK-NEXT: bltu a0, a1, .LBB45_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: mv a0, a1
-; CHECK-NEXT: .LBB33_2:
+; CHECK-NEXT: .LBB45_2:
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
>From bada08fe136356bb737e95e011ef01044a25fa0a Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Fri, 7 Nov 2025 07:20:12 -0800
Subject: [PATCH 2/3] fixup! clang-format, keep only bf16 tests
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +-
.../RISCV/rvv/fixed-vectors-vfabs-sdnode.ll | 459 ++----------------
.../RISCV/rvv/fixed-vectors-vfneg-sdnode.ll | 405 ++--------------
3 files changed, 71 insertions(+), 800 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4bb83d1f600fb..155449018af65 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -88,10 +88,9 @@ static cl::opt<bool>
cl::init(true));
// TODO: Support more ops
-static const unsigned ZvfbfaVPOps[] = {
- ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
-static const unsigned ZvfbfaOps[] = {
- ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+static const unsigned ZvfbfaVPOps[] = {ISD::VP_FNEG, ISD::VP_FABS,
+ ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
index 1907e2a689a4b..27c00de3c3487 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll
@@ -1,457 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
-; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
-; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
-; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
-; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
-; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFBFA
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
-; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
define <1 x bfloat> @v1bf16(<1 x bfloat> %v) {
-; ZVFH-LABEL: v1bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: addi a0, a0, -1
-; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFH-NEXT: vand.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v1bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v1bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfabs.v v8, v8
-; ZVFBFA-NEXT: ret
- %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v)
- ret <1 x bfloat> %r
-}
-
-define <2 x bfloat> @v2bf16(<2 x bfloat> %v) {
-; ZVFH-LABEL: v2bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: addi a0, a0, -1
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFH-NEXT: vand.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v2bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v2bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfabs.v v8, v8
-; ZVFBFA-NEXT: ret
- %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v)
- ret <2 x bfloat> %r
-}
-
-define <4 x bfloat> @v4bf16(<4 x bfloat> %v) {
-; ZVFH-LABEL: v4bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: addi a0, a0, -1
-; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT: vand.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v4bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v4bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfabs.v v8, v8
-; ZVFBFA-NEXT: ret
- %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v)
- ret <4 x bfloat> %r
-}
-
-define <8 x bfloat> @v8bf16(<8 x bfloat> %v) {
-; ZVFH-LABEL: v8bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: addi a0, a0, -1
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT: vand.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v8bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v8bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfabs.v v8, v8
-; ZVFBFA-NEXT: ret
- %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v)
- ret <8 x bfloat> %r
-}
-
-define <16 x bfloat> @v16bf16(<16 x bfloat> %v) {
-; ZVFH-LABEL: v16bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: addi a0, a0, -1
-; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT: vand.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v16bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v16bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfabs.v v8, v8
-; ZVFBFA-NEXT: ret
- %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v)
- ret <16 x bfloat> %r
-}
-
-define <32 x bfloat> @v32bf16(<32 x bfloat> %v) {
-; ZVFH-LABEL: v32bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: addi a0, a0, -1
-; ZVFH-NEXT: li a1, 32
-; ZVFH-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFH-NEXT: vand.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v32bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: li a1, 32
-; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v32bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: li a0, 32
-; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfabs.v v8, v8
-; ZVFBFA-NEXT: ret
- %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v)
- ret <32 x bfloat> %r
-}
-
-declare <1 x half> @llvm.fabs.v1f16(<1 x half>)
-
-define <1 x half> @vfabs_v1f16(<1 x half> %v) {
-; ZVFH-LABEL: vfabs_v1f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfabs_v1f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfabs_v1f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: addi a0, a0, -1
-; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFBFA-NEXT: vand.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %r = call <1 x half> @llvm.fabs.v1f16(<1 x half> %v)
- ret <1 x half> %r
-}
-
-declare <2 x half> @llvm.fabs.v2f16(<2 x half>)
-
-define <2 x half> @vfabs_v2f16(<2 x half> %v) {
-; ZVFH-LABEL: vfabs_v2f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfabs_v2f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfabs_v2f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: addi a0, a0, -1
-; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFBFA-NEXT: vand.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %r = call <2 x half> @llvm.fabs.v2f16(<2 x half> %v)
- ret <2 x half> %r
-}
-
-declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
-
-define <4 x half> @vfabs_v4f16(<4 x half> %v) {
-; ZVFH-LABEL: vfabs_v4f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfabs_v4f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfabs_v4f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: addi a0, a0, -1
-; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFBFA-NEXT: vand.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %r = call <4 x half> @llvm.fabs.v4f16(<4 x half> %v)
- ret <4 x half> %r
-}
-
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-
-define <8 x half> @vfabs_v8f16(<8 x half> %v) {
-; ZVFH-LABEL: vfabs_v8f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfabs_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfabs_v8f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: addi a0, a0, -1
-; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFBFA-NEXT: vand.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %r = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
- ret <8 x half> %r
-}
-
-declare <16 x half> @llvm.fabs.v16f16(<16 x half>)
-
-define <16 x half> @vfabs_v16f16(<16 x half> %v) {
-; ZVFH-LABEL: vfabs_v16f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfabs_v16f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfabs_v16f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: addi a0, a0, -1
-; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFBFA-NEXT: vand.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %r = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
- ret <16 x half> %r
-}
-
-declare <32 x half> @llvm.fabs.v32f16(<32 x half>)
-
-define <32 x half> @vfabs_v32f16(<32 x half> %v) {
-; ZVFH-LABEL: vfabs_v32f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: li a0, 32
-; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfabs_v32f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: addi a0, a0, -1
-; ZVFHMIN-NEXT: li a1, 32
-; ZVFHMIN-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vand.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfabs_v32f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: addi a0, a0, -1
-; ZVFBFA-NEXT: li a1, 32
-; ZVFBFA-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; ZVFBFA-NEXT: vand.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %r = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
- ret <32 x half> %r
-}
-
-declare <1 x float> @llvm.fabs.v1f32(<1 x float>)
-
-define <1 x float> @vfabs_v1f32(<1 x float> %v) {
-; CHECK-LABEL: vfabs_v1f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: ret
- %r = call <1 x float> @llvm.fabs.v1f32(<1 x float> %v)
- ret <1 x float> %r
-}
-
-declare <2 x float> @llvm.fabs.v2f32(<2 x float>)
-
-define <2 x float> @vfabs_v2f32(<2 x float> %v) {
-; CHECK-LABEL: vfabs_v2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: ret
- %r = call <2 x float> @llvm.fabs.v2f32(<2 x float> %v)
- ret <2 x float> %r
-}
-
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
-
-define <4 x float> @vfabs_v4f32(<4 x float> %v) {
-; CHECK-LABEL: vfabs_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfabs.v v8, v8
-; CHECK-NEXT: ret
- %r = call <4 x float> @llvm.fabs.v4f32(<4 x float> %v)
- ret <4 x float> %r
-}
-
-declare <8 x float> @llvm.fabs.v8f32(<8 x float>)
-
-define <8 x float> @vfabs_v8f32(<8 x float> %v) {
-; CHECK-LABEL: vfabs_v8f32:
+; CHECK-LABEL: v1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
- %r = call <8 x float> @llvm.fabs.v8f32(<8 x float> %v)
- ret <8 x float> %r
+ %r = call <1 x bfloat> @llvm.fabs.v1bf16(<1 x bfloat> %v)
+ ret <1 x bfloat> %r
}
-declare <16 x float> @llvm.fabs.v16f32(<16 x float>)
-
-define <16 x float> @vfabs_v16f32(<16 x float> %v) {
-; CHECK-LABEL: vfabs_v16f32:
+define <2 x bfloat> @v2bf16(<2 x bfloat> %v) {
+; CHECK-LABEL: v2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
- %r = call <16 x float> @llvm.fabs.v16f32(<16 x float> %v)
- ret <16 x float> %r
+ %r = call <2 x bfloat> @llvm.fabs.v2bf16(<2 x bfloat> %v)
+ ret <2 x bfloat> %r
}
-declare <1 x double> @llvm.fabs.v1f64(<1 x double>)
-
-define <1 x double> @vfabs_v1f64(<1 x double> %v) {
-; CHECK-LABEL: vfabs_v1f64:
+define <4 x bfloat> @v4bf16(<4 x bfloat> %v) {
+; CHECK-LABEL: v4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
- %r = call <1 x double> @llvm.fabs.v1f64(<1 x double> %v)
- ret <1 x double> %r
+ %r = call <4 x bfloat> @llvm.fabs.v4bf16(<4 x bfloat> %v)
+ ret <4 x bfloat> %r
}
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-
-define <2 x double> @vfabs_v2f64(<2 x double> %v) {
-; CHECK-LABEL: vfabs_v2f64:
+define <8 x bfloat> @v8bf16(<8 x bfloat> %v) {
+; CHECK-LABEL: v8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
- %r = call <2 x double> @llvm.fabs.v2f64(<2 x double> %v)
- ret <2 x double> %r
+ %r = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %v)
+ ret <8 x bfloat> %r
}
-declare <4 x double> @llvm.fabs.v4f64(<4 x double>)
-
-define <4 x double> @vfabs_v4f64(<4 x double> %v) {
-; CHECK-LABEL: vfabs_v4f64:
+define <16 x bfloat> @v16bf16(<16 x bfloat> %v) {
+; CHECK-LABEL: v16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
- %r = call <4 x double> @llvm.fabs.v4f64(<4 x double> %v)
- ret <4 x double> %r
+ %r = call <16 x bfloat> @llvm.fabs.v16bf16(<16 x bfloat> %v)
+ ret <16 x bfloat> %r
}
-declare <8 x double> @llvm.fabs.v8f64(<8 x double>)
-
-define <8 x double> @vfabs_v8f64(<8 x double> %v) {
-; CHECK-LABEL: vfabs_v8f64:
+define <32 x bfloat> @v32bf16(<32 x bfloat> %v) {
+; CHECK-LABEL: v32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
; CHECK-NEXT: vfabs.v v8, v8
; CHECK-NEXT: ret
- %r = call <8 x double> @llvm.fabs.v8f64(<8 x double> %v)
- ret <8 x double> %r
+ %r = call <32 x bfloat> @llvm.fabs.v32bf16(<32 x bfloat> %v)
+ ret <32 x bfloat> %r
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
index e02b3631d588f..b3b9a62600f46 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll
@@ -1,403 +1,66 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \
-; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \
-; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
-; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+zvfbfmin,+v \
-; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
-; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFBFA
-; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v \
-; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s \
-; RUN: --check-prefixes=CHECK,ZVFBFA
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvfbfa,+v \
+; RUN: -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
define <1 x bfloat> @v1bf16(<1 x bfloat> %va) {
-; ZVFH-LABEL: v1bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFH-NEXT: vxor.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v1bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v1bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfneg.v v8, v8
-; ZVFBFA-NEXT: ret
- %vb = fneg <1 x bfloat> %va
- ret <1 x bfloat> %vb
-}
-
-define <2 x bfloat> @v2bf16(<2 x bfloat> %va) {
-; ZVFH-LABEL: v2bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFH-NEXT: vxor.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v2bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v2bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
-; ZVFBFA-NEXT: vfneg.v v8, v8
-; ZVFBFA-NEXT: ret
- %vb = fneg <2 x bfloat> %va
- ret <2 x bfloat> %vb
-}
-
-define <4 x bfloat> @v4bf16(<4 x bfloat> %va) {
-; ZVFH-LABEL: v4bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT: vxor.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v4bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v4bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
-; ZVFBFA-NEXT: vfneg.v v8, v8
-; ZVFBFA-NEXT: ret
- %vb = fneg <4 x bfloat> %va
- ret <4 x bfloat> %vb
-}
-
-define <8 x bfloat> @v8bf16(<8 x bfloat> %va) {
-; ZVFH-LABEL: v8bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT: vxor.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v8bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v8bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
-; ZVFBFA-NEXT: vfneg.v v8, v8
-; ZVFBFA-NEXT: ret
- %vb = fneg <8 x bfloat> %va
- ret <8 x bfloat> %vb
-}
-
-define <16 x bfloat> @v16bf16(<16 x bfloat> %va) {
-; ZVFH-LABEL: v16bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: lui a0, 8
-; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT: vxor.vx v8, v8, a0
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v16bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v16bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
-; ZVFBFA-NEXT: vfneg.v v8, v8
-; ZVFBFA-NEXT: ret
- %vb = fneg <16 x bfloat> %va
- ret <16 x bfloat> %vb
-}
-
-define <32 x bfloat> @v32bf16(<32 x bfloat> %va) {
-; ZVFH-LABEL: v32bf16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: li a0, 32
-; ZVFH-NEXT: lui a1, 8
-; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT: vxor.vx v8, v8, a1
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: v32bf16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: li a0, 32
-; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: v32bf16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: li a0, 32
-; ZVFBFA-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
-; ZVFBFA-NEXT: vfneg.v v8, v8
-; ZVFBFA-NEXT: ret
- %vb = fneg <32 x bfloat> %va
- ret <32 x bfloat> %vb
-}
-
-define <1 x half> @vfneg_vv_v1f16(<1 x half> %va) {
-; ZVFH-LABEL: vfneg_vv_v1f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfneg_vv_v1f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfneg_vv_v1f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; ZVFBFA-NEXT: vxor.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %vb = fneg <1 x half> %va
- ret <1 x half> %vb
-}
-
-define <2 x half> @vfneg_vv_v2f16(<2 x half> %va) {
-; ZVFH-LABEL: vfneg_vv_v2f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfneg_vv_v2f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfneg_vv_v2f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; ZVFBFA-NEXT: vxor.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %vb = fneg <2 x half> %va
- ret <2 x half> %vb
-}
-
-define <4 x half> @vfneg_vv_v4f16(<4 x half> %va) {
-; ZVFH-LABEL: vfneg_vv_v4f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfneg_vv_v4f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfneg_vv_v4f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVFBFA-NEXT: vxor.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %vb = fneg <4 x half> %va
- ret <4 x half> %vb
-}
-
-define <8 x half> @vfneg_vv_v8f16(<8 x half> %va) {
-; ZVFH-LABEL: vfneg_vv_v8f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfneg_vv_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfneg_vv_v8f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFBFA-NEXT: vxor.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %vb = fneg <8 x half> %va
- ret <8 x half> %vb
-}
-
-define <16 x half> @vfneg_vv_v16f16(<16 x half> %va) {
-; ZVFH-LABEL: vfneg_vv_v16f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfneg_vv_v16f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: lui a0, 8
-; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a0
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfneg_vv_v16f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: lui a0, 8
-; ZVFBFA-NEXT: vsetivli zero, 16, e16, m2, ta, ma
-; ZVFBFA-NEXT: vxor.vx v8, v8, a0
-; ZVFBFA-NEXT: ret
- %vb = fneg <16 x half> %va
- ret <16 x half> %vb
-}
-
-define <32 x half> @vfneg_vv_v32f16(<32 x half> %va) {
-; ZVFH-LABEL: vfneg_vv_v32f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: li a0, 32
-; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: vfneg_vv_v32f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: li a0, 32
-; ZVFHMIN-NEXT: lui a1, 8
-; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vxor.vx v8, v8, a1
-; ZVFHMIN-NEXT: ret
-;
-; ZVFBFA-LABEL: vfneg_vv_v32f16:
-; ZVFBFA: # %bb.0:
-; ZVFBFA-NEXT: li a0, 32
-; ZVFBFA-NEXT: lui a1, 8
-; ZVFBFA-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; ZVFBFA-NEXT: vxor.vx v8, v8, a1
-; ZVFBFA-NEXT: ret
- %vb = fneg <32 x half> %va
- ret <32 x half> %vb
-}
-
-define <1 x float> @vfneg_vv_v1f32(<1 x float> %va) {
-; CHECK-LABEL: vfneg_vv_v1f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vfneg.v v8, v8
-; CHECK-NEXT: ret
- %vb = fneg <1 x float> %va
- ret <1 x float> %vb
-}
-
-define <2 x float> @vfneg_vv_v2f32(<2 x float> %va) {
-; CHECK-LABEL: vfneg_vv_v2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfneg.v v8, v8
-; CHECK-NEXT: ret
- %vb = fneg <2 x float> %va
- ret <2 x float> %vb
-}
-
-define <4 x float> @vfneg_vv_v4f32(<4 x float> %va) {
-; CHECK-LABEL: vfneg_vv_v4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfneg.v v8, v8
-; CHECK-NEXT: ret
- %vb = fneg <4 x float> %va
- ret <4 x float> %vb
-}
-
-define <8 x float> @vfneg_vv_v8f32(<8 x float> %va) {
-; CHECK-LABEL: vfneg_vv_v8f32:
+; CHECK-LABEL: v1bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 1, e16alt, mf4, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
- %vb = fneg <8 x float> %va
- ret <8 x float> %vb
+ %vb = fneg <1 x bfloat> %va
+ ret <1 x bfloat> %vb
}
-define <16 x float> @vfneg_vv_v16f32(<16 x float> %va) {
-; CHECK-LABEL: vfneg_vv_v16f32:
+define <2 x bfloat> @v2bf16(<2 x bfloat> %va) {
+; CHECK-LABEL: v2bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 2, e16alt, mf4, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
- %vb = fneg <16 x float> %va
- ret <16 x float> %vb
+ %vb = fneg <2 x bfloat> %va
+ ret <2 x bfloat> %vb
}
-define <1 x double> @vfneg_vv_v1f64(<1 x double> %va) {
-; CHECK-LABEL: vfneg_vv_v1f64:
+define <4 x bfloat> @v4bf16(<4 x bfloat> %va) {
+; CHECK-LABEL: v4bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e16alt, mf2, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
- %vb = fneg <1 x double> %va
- ret <1 x double> %vb
+ %vb = fneg <4 x bfloat> %va
+ ret <4 x bfloat> %vb
}
-define <2 x double> @vfneg_vv_v2f64(<2 x double> %va) {
-; CHECK-LABEL: vfneg_vv_v2f64:
+define <8 x bfloat> @v8bf16(<8 x bfloat> %va) {
+; CHECK-LABEL: v8bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e16alt, m1, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
- %vb = fneg <2 x double> %va
- ret <2 x double> %vb
+ %vb = fneg <8 x bfloat> %va
+ ret <8 x bfloat> %vb
}
-define <4 x double> @vfneg_vv_v4f64(<4 x double> %va) {
-; CHECK-LABEL: vfneg_vv_v4f64:
+define <16 x bfloat> @v16bf16(<16 x bfloat> %va) {
+; CHECK-LABEL: v16bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 16, e16alt, m2, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
- %vb = fneg <4 x double> %va
- ret <4 x double> %vb
+ %vb = fneg <16 x bfloat> %va
+ ret <16 x bfloat> %vb
}
-define <8 x double> @vfneg_vv_v8f64(<8 x double> %va) {
-; CHECK-LABEL: vfneg_vv_v8f64:
+define <32 x bfloat> @v32bf16(<32 x bfloat> %va) {
+; CHECK-LABEL: v32bf16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
; CHECK-NEXT: vfneg.v v8, v8
; CHECK-NEXT: ret
- %vb = fneg <8 x double> %va
- ret <8 x double> %vb
+ %vb = fneg <32 x bfloat> %va
+ ret <32 x bfloat> %vb
}
>From 16feae16129cd4785ec5e41f51c81bd6bd29a80e Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Sun, 9 Nov 2025 06:38:08 -0800
Subject: [PATCH 3/3] fixup! use is_contained, remove declare for vp
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++----
.../CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll | 8 --------
.../test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll | 8 --------
.../test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll | 8 --------
llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll | 12 ------------
llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll | 12 ------------
llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll | 12 ------------
7 files changed, 2 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 155449018af65..a3ccbd8d4a8aa 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7318,10 +7318,8 @@ static bool isPromotedOpNeedingSplit(SDValue Op,
(Op.getValueType() == MVT::nxv32bf16 &&
Subtarget.hasVInstructionsBF16Minimal() &&
(!Subtarget.hasVInstructionsBF16() ||
- (std::find(std::begin(ZvfbfaOps), std::end(ZvfbfaOps),
- Op.getOpcode()) == std::end(ZvfbfaOps) &&
- std::find(std::begin(ZvfbfaVPOps), std::end(ZvfbfaVPOps),
- Op.getOpcode()) == std::end(ZvfbfaVPOps))));
+ (!llvm::is_contained(ZvfbfaOps, Op.getOpcode()) &&
+ !llvm::is_contained(ZvfbfaVPOps, Op.getOpcode()))));
}
static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
index 967ca092fe3c1..2455d872ae7f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
@@ -8,8 +8,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
-declare <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x i1>, i32)
-
define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_v2bf16:
; ZVFH: # %bb.0:
@@ -50,8 +48,6 @@ define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %v
ret <2 x bfloat> %v
}
-declare <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat>, <4 x bfloat>, <4 x i1>, i32)
-
define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_v4bf16:
; ZVFH: # %bb.0:
@@ -92,8 +88,6 @@ define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %v
ret <4 x bfloat> %v
}
-declare <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat>, <8 x bfloat>, <8 x i1>, i32)
-
define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_v8bf16:
; ZVFH: # %bb.0:
@@ -134,8 +128,6 @@ define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %v
ret <8 x bfloat> %v
}
-declare <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat>, <16 x bfloat>, <16 x i1>, i32)
-
define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_v16bf16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
index 8b76236f3491d..01bd706ed31f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll
@@ -12,8 +12,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
-declare <2 x bfloat> @llvm.vp.fabs.v2bf16(<2 x bfloat>, <2 x i1>, i32)
-
define <2 x bfloat> @vfabs_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_v2bf16:
; ZVFH: # %bb.0:
@@ -66,8 +64,6 @@ define <2 x bfloat> @vfabs_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl
ret <2 x bfloat> %v
}
-declare <4 x bfloat> @llvm.vp.fabs.v4bf16(<4 x bfloat>, <4 x i1>, i32)
-
define <4 x bfloat> @vfabs_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_v4bf16:
; ZVFH: # %bb.0:
@@ -120,8 +116,6 @@ define <4 x bfloat> @vfabs_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl
ret <4 x bfloat> %v
}
-declare <8 x bfloat> @llvm.vp.fabs.v8bf16(<8 x bfloat>, <8 x i1>, i32)
-
define <8 x bfloat> @vfabs_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_v8bf16:
; ZVFH: # %bb.0:
@@ -174,8 +168,6 @@ define <8 x bfloat> @vfabs_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl
ret <8 x bfloat> %v
}
-declare <16 x bfloat> @llvm.vp.fabs.v16bf16(<16 x bfloat>, <16 x i1>, i32)
-
define <16 x bfloat> @vfabs_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_v16bf16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
index 4bfebc29837d8..dede0e707d929 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll
@@ -12,8 +12,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
-declare <2 x bfloat> @llvm.vp.fneg.v2bf16(<2 x bfloat>, <2 x i1>, i32)
-
define <2 x bfloat> @vfneg_vv_v2bf16(<2 x bfloat> %va, <2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_v2bf16:
; ZVFH: # %bb.0:
@@ -62,8 +60,6 @@ define <2 x bfloat> @vfneg_vv_v2bf16_unmasked(<2 x bfloat> %va, i32 zeroext %evl
ret <2 x bfloat> %v
}
-declare <4 x bfloat> @llvm.vp.fneg.v4bf16(<4 x bfloat>, <4 x i1>, i32)
-
define <4 x bfloat> @vfneg_vv_v4bf16(<4 x bfloat> %va, <4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_v4bf16:
; ZVFH: # %bb.0:
@@ -112,8 +108,6 @@ define <4 x bfloat> @vfneg_vv_v4bf16_unmasked(<4 x bfloat> %va, i32 zeroext %evl
ret <4 x bfloat> %v
}
-declare <8 x bfloat> @llvm.vp.fneg.v8bf16(<8 x bfloat>, <8 x i1>, i32)
-
define <8 x bfloat> @vfneg_vv_v8bf16(<8 x bfloat> %va, <8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_v8bf16:
; ZVFH: # %bb.0:
@@ -162,8 +156,6 @@ define <8 x bfloat> @vfneg_vv_v8bf16_unmasked(<8 x bfloat> %va, i32 zeroext %evl
ret <8 x bfloat> %v
}
-declare <16 x bfloat> @llvm.vp.fneg.v16bf16(<16 x bfloat>, <16 x i1>, i32)
-
define <16 x bfloat> @vfneg_vv_v16bf16(<16 x bfloat> %va, <16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_v16bf16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
index 9b3d73c7acc0a..2f5fde3bb3b20 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll
@@ -12,8 +12,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
-declare <vscale x 1 x bfloat> @llvm.vp.copysign.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
-
define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x bfloat> %vb, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_nxv1bf16:
; ZVFH: # %bb.0:
@@ -74,8 +72,6 @@ define <vscale x 1 x bfloat> @vfsgnj_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat>
ret <vscale x 1 x bfloat> %v
}
-declare <vscale x 2 x bfloat> @llvm.vp.copysign.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
-
define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x bfloat> %vb, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_nxv2bf16:
; ZVFH: # %bb.0:
@@ -136,8 +132,6 @@ define <vscale x 2 x bfloat> @vfsgnj_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat>
ret <vscale x 2 x bfloat> %v
}
-declare <vscale x 4 x bfloat> @llvm.vp.copysign.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
-
define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x bfloat> %vb, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_nxv4bf16:
; ZVFH: # %bb.0:
@@ -198,8 +192,6 @@ define <vscale x 4 x bfloat> @vfsgnj_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat>
ret <vscale x 4 x bfloat> %v
}
-declare <vscale x 8 x bfloat> @llvm.vp.copysign.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
-
define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x bfloat> %vb, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_nxv8bf16:
; ZVFH: # %bb.0:
@@ -260,8 +252,6 @@ define <vscale x 8 x bfloat> @vfsgnj_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat>
ret <vscale x 8 x bfloat> %v
}
-declare <vscale x 16 x bfloat> @llvm.vp.copysign.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
-
define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x bfloat> %vb, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_nxv16bf16:
; ZVFH: # %bb.0:
@@ -322,8 +312,6 @@ define <vscale x 16 x bfloat> @vfsgnj_vv_nxv16bf16_unmasked(<vscale x 16 x bfloa
ret <vscale x 16 x bfloat> %v
}
-declare <vscale x 32 x bfloat> @llvm.vp.copysign.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
-
define <vscale x 32 x bfloat> @vfsgnj_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x bfloat> %vb, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfsgnj_vv_nxv32bf16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
index 697367e9b432d..c6888c0bcae0f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll
@@ -12,8 +12,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v,+experimental-zvfbfa -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
-declare <vscale x 1 x bfloat> @llvm.vp.fabs.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
-
define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_nxv1bf16:
; ZVFH: # %bb.0:
@@ -66,8 +64,6 @@ define <vscale x 1 x bfloat> @vfabs_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
ret <vscale x 1 x bfloat> %v
}
-declare <vscale x 2 x bfloat> @llvm.vp.fabs.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
-
define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_nxv2bf16:
; ZVFH: # %bb.0:
@@ -120,8 +116,6 @@ define <vscale x 2 x bfloat> @vfabs_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
ret <vscale x 2 x bfloat> %v
}
-declare <vscale x 4 x bfloat> @llvm.vp.fabs.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
-
define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_nxv4bf16:
; ZVFH: # %bb.0:
@@ -174,8 +168,6 @@ define <vscale x 4 x bfloat> @vfabs_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
ret <vscale x 4 x bfloat> %v
}
-declare <vscale x 8 x bfloat> @llvm.vp.fabs.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
-
define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_nxv8bf16:
; ZVFH: # %bb.0:
@@ -228,8 +220,6 @@ define <vscale x 8 x bfloat> @vfabs_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
ret <vscale x 8 x bfloat> %v
}
-declare <vscale x 16 x bfloat> @llvm.vp.fabs.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
-
define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_nxv16bf16:
; ZVFH: # %bb.0:
@@ -282,8 +272,6 @@ define <vscale x 16 x bfloat> @vfabs_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
ret <vscale x 16 x bfloat> %v
}
-declare <vscale x 32 x bfloat> @llvm.vp.fabs.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
-
define <vscale x 32 x bfloat> @vfabs_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfabs_vv_nxv32bf16:
; ZVFH: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
index 80bb0c70cde29..9bd24c44b1b90 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll
@@ -12,8 +12,6 @@
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+experimental-zvfbfa,+v -target-abi=lp64d \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFBFA
-declare <vscale x 1 x bfloat> @llvm.vp.fneg.nxv1bf16(<vscale x 1 x bfloat>, <vscale x 1 x i1>, i32)
-
define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16(<vscale x 1 x bfloat> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_nxv1bf16:
; ZVFH: # %bb.0:
@@ -62,8 +60,6 @@ define <vscale x 1 x bfloat> @vfneg_vv_nxv1bf16_unmasked(<vscale x 1 x bfloat> %
ret <vscale x 1 x bfloat> %v
}
-declare <vscale x 2 x bfloat> @llvm.vp.fneg.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x i1>, i32)
-
define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16(<vscale x 2 x bfloat> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_nxv2bf16:
; ZVFH: # %bb.0:
@@ -112,8 +108,6 @@ define <vscale x 2 x bfloat> @vfneg_vv_nxv2bf16_unmasked(<vscale x 2 x bfloat> %
ret <vscale x 2 x bfloat> %v
}
-declare <vscale x 4 x bfloat> @llvm.vp.fneg.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x i1>, i32)
-
define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16(<vscale x 4 x bfloat> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_nxv4bf16:
; ZVFH: # %bb.0:
@@ -162,8 +156,6 @@ define <vscale x 4 x bfloat> @vfneg_vv_nxv4bf16_unmasked(<vscale x 4 x bfloat> %
ret <vscale x 4 x bfloat> %v
}
-declare <vscale x 8 x bfloat> @llvm.vp.fneg.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, i32)
-
define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16(<vscale x 8 x bfloat> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_nxv8bf16:
; ZVFH: # %bb.0:
@@ -212,8 +204,6 @@ define <vscale x 8 x bfloat> @vfneg_vv_nxv8bf16_unmasked(<vscale x 8 x bfloat> %
ret <vscale x 8 x bfloat> %v
}
-declare <vscale x 16 x bfloat> @llvm.vp.fneg.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x i1>, i32)
-
define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16(<vscale x 16 x bfloat> %va, <vscale x 16 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_nxv16bf16:
; ZVFH: # %bb.0:
@@ -262,8 +252,6 @@ define <vscale x 16 x bfloat> @vfneg_vv_nxv16bf16_unmasked(<vscale x 16 x bfloat
ret <vscale x 16 x bfloat> %v
}
-declare <vscale x 32 x bfloat> @llvm.vp.fneg.nxv32bf16(<vscale x 32 x bfloat>, <vscale x 32 x i1>, i32)
-
define <vscale x 32 x bfloat> @vfneg_vv_nxv32bf16(<vscale x 32 x bfloat> %va, <vscale x 32 x i1> %m, i32 zeroext %evl) {
; ZVFH-LABEL: vfneg_vv_nxv32bf16:
; ZVFH: # %bb.0:
More information about the llvm-commits
mailing list