[llvm] [llvm][RISCV] Support Zvfbfa codegen for fneg, fabs and copysign (PR #166944)

Fri Nov 7 06:39:13 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

Author: Brandon Wu (4vtomat)

<details>
<summary>Changes</summary>

This is first patch for Zvfbfa codegen and I'm going to break it down to
several patches to make it easier to reivew.
The codegen supports both scalable vector and fixed length vector on both
native operations and vp intrinsics.


---

Patch is 204.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/166944.diff


14 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+79-2) 
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td (+82-1) 
- (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll (+56) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll (+180-8) 
- (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-sdnode.ll (+457) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll (+292-8) 
- (added) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-sdnode.ll (+403) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll (+268-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vcopysign-vp.ll (+500-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll (+180-42) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll (+432-8) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll (+648-54) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll (+162-36) 
- (modified) llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll (+396-8) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1977d3372c5f6..4bb83d1f600fb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,6 +87,12 @@ static cl::opt<bool>
                                "be combined with a shift"),
                       cl::init(true));
 
+// TODO: Support more ops
+static const unsigned ZvfbfaVPOps[] = {
+    ISD::VP_FNEG, ISD::VP_FABS, ISD::VP_FCOPYSIGN};
+static const unsigned ZvfbfaOps[] = {
+    ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN};
+
 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
                                          const RISCVSubtarget &STI)
     : TargetLowering(TM), Subtarget(STI) {
@@ -1208,6 +1214,61 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       }
     };
 
+    // Sets common actions for zvfbfa, some of instructions are supported
+    // natively so that we don't need to promote them.
+    const auto SetZvfbfaActions = [&](MVT VT) {
+      setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom);
+      setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT,
+                         Custom);
+      setOperationAction({ISD::VP_FP_ROUND, ISD::VP_FP_EXTEND}, VT, Custom);
+      setOperationAction({ISD::LRINT, ISD::LLRINT}, VT, Custom);
+      setOperationAction({ISD::LROUND, ISD::LLROUND}, VT, Custom);
+      setOperationAction({ISD::VP_MERGE, ISD::VP_SELECT, ISD::SELECT}, VT,
+                         Custom);
+      setOperationAction(ISD::SELECT_CC, VT, Expand);
+      setOperationAction({ISD::VP_SINT_TO_FP, ISD::VP_UINT_TO_FP}, VT, Custom);
+      setOperationAction({ISD::INSERT_VECTOR_ELT, ISD::CONCAT_VECTORS,
+                          ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR,
+                          ISD::VECTOR_DEINTERLEAVE, ISD::VECTOR_INTERLEAVE,
+                          ISD::VECTOR_REVERSE, ISD::VECTOR_SPLICE,
+                          ISD::VECTOR_COMPRESS},
+                         VT, Custom);
+      setOperationAction(ISD::EXPERIMENTAL_VP_SPLICE, VT, Custom);
+      setOperationAction(ISD::EXPERIMENTAL_VP_REVERSE, VT, Custom);
+
+      setOperationAction(ISD::FCOPYSIGN, VT, Legal);
+      setOperationAction(ZvfbfaVPOps, VT, Custom);
+
+      MVT EltVT = VT.getVectorElementType();
+      if (isTypeLegal(EltVT))
+        setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT,
+                            ISD::EXTRACT_VECTOR_ELT},
+                           VT, Custom);
+      else
+        setOperationAction({ISD::SPLAT_VECTOR, ISD::EXPERIMENTAL_VP_SPLAT},
+                           EltVT, Custom);
+      setOperationAction({ISD::LOAD, ISD::STORE, ISD::MLOAD, ISD::MSTORE,
+                          ISD::MGATHER, ISD::MSCATTER, ISD::VP_LOAD,
+                          ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD,
+                          ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER,
+                          ISD::VP_SCATTER},
+                         VT, Custom);
+      setOperationAction(ISD::VP_LOAD_FF, VT, Custom);
+
+      // Expand FP operations that need libcalls.
+      setOperationAction(FloatingPointLibCallOps, VT, Expand);
+
+      // Custom split nxv32[b]f16 since nxv32[b]f32 is not legal.
+      if (getLMUL(VT) == RISCVVType::LMUL_8) {
+        setOperationAction(ZvfhminZvfbfminPromoteOps, VT, Custom);
+        setOperationAction(ZvfhminZvfbfminPromoteVPOps, VT, Custom);
+      } else {
+        MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+        setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
+        setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
+      }
+    };
+
     if (Subtarget.hasVInstructionsF16()) {
       for (MVT VT : F16VecVTs) {
         if (!isTypeLegal(VT))
@@ -1222,7 +1283,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
       }
     }
 
-    if (Subtarget.hasVInstructionsBF16Minimal()) {
+    if (Subtarget.hasVInstructionsBF16()) {
+      for (MVT VT : BF16VecVTs) {
+        if (!isTypeLegal(VT))
+          continue;
+        SetZvfbfaActions(VT);
+      }
+    } else if (Subtarget.hasVInstructionsBF16Minimal()) {
       for (MVT VT : BF16VecVTs) {
         if (!isTypeLegal(VT))
           continue;
@@ -1501,6 +1568,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
             // available.
             setOperationAction(ISD::BUILD_VECTOR, MVT::bf16, Custom);
           }
+          if (Subtarget.hasStdExtZvfbfa()) {
+            setOperationAction(ZvfbfaOps, VT, Custom);
+            setOperationAction(ZvfbfaVPOps, VT, Custom);
+          }
           setOperationAction(
               {ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
               Custom);
@@ -7245,7 +7316,13 @@ static bool isPromotedOpNeedingSplit(SDValue Op,
   return (Op.getValueType() == MVT::nxv32f16 &&
           (Subtarget.hasVInstructionsF16Minimal() &&
            !Subtarget.hasVInstructionsF16())) ||
-         Op.getValueType() == MVT::nxv32bf16;
+         (Op.getValueType() == MVT::nxv32bf16 &&
+          Subtarget.hasVInstructionsBF16Minimal() &&
+          (!Subtarget.hasVInstructionsBF16() ||
+           (std::find(std::begin(ZvfbfaOps), std::end(ZvfbfaOps),
+                      Op.getOpcode()) == std::end(ZvfbfaOps) &&
+            std::find(std::begin(ZvfbfaVPOps), std::end(ZvfbfaVPOps),
+                      Op.getOpcode()) == std::end(ZvfbfaVPOps))));
 }
 
 static SDValue SplitVectorOp(SDValue Op, SelectionDAG &DAG) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
index b9c5b75983b1f..49f1c92750f3a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td
@@ -701,5 +701,86 @@ let Predicates = [HasStdExtZvfbfa] in {
                   FRM_DYN,
                   fvti.AVL, fvti.Log2SEW, TA_MA)>;
   }
-}
+
+  foreach vti = AllBF16Vectors in {
+    // 13.12. Vector Floating-Point Sign-Injection Instructions
+    def : Pat<(fabs (vti.Vector vti.RegClass:$rs)),
+              (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+                   (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+    // Handle fneg with VFSGNJN using the same input for both operands.
+    def : Pat<(fneg (vti.Vector vti.RegClass:$rs)),
+              (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+                   (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+    def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                     (vti.Vector vti.RegClass:$rs2))),
+              (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+                   (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+    def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                     (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))),
+              (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+                   (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+    def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                     (vti.Vector (fneg vti.RegClass:$rs2)))),
+              (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+                   (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+    def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1),
+                                     (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))),
+              (!cast<Instruction>("PseudoVFSGNJN_ALT_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_E"#vti.SEW)
+                   (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TA_MA)>;
+
+    // 13.12. Vector Floating-Point Sign-Injection Instructions
+    def : Pat<(riscv_fabs_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+                             VLOpFrag),
+              (!cast<Instruction>("PseudoVFSGNJX_ALT_VV_"# vti.LMul.MX #"_E"#vti.SEW#"_MASK")
+                   (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+                   vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+                   TA_MA)>;
+    // Handle fneg with VFSGNJN using the same input for both operands.
+    def : Pat<(riscv_fneg_vl (vti.Vector vti.RegClass:$rs), (vti.Mask VMV0:$vm),
+                             VLOpFrag),
+              (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW #"_MASK")
+                   (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs,
+                   vti.RegClass:$rs, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+                   TA_MA)>;
+
+    def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+                                  (vti.Vector vti.RegClass:$rs2),
+                                  vti.RegClass:$passthru,
+                                  (vti.Mask VMV0:$vm),
+                                  VLOpFrag),
+              (!cast<Instruction>("PseudoVFSGNJ_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+                   vti.RegClass:$passthru, vti.RegClass:$rs1,
+                   vti.RegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+                   TAIL_AGNOSTIC)>;
+
+    def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+                                  (riscv_fneg_vl vti.RegClass:$rs2,
+                                                 (vti.Mask true_mask),
+                                                 VLOpFrag),
+                                  srcvalue,
+                                  (vti.Mask true_mask),
+                                  VLOpFrag),
+              (!cast<Instruction>("PseudoVFSGNJN_ALT_VV_"# vti.LMul.MX#"_E"#vti.SEW)
+        (vti.Vector (IMPLICIT_DEF)),
+                   vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TA_MA)>;
+
+    def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1),
+                                  (SplatFPOp vti.ScalarRegClass:$rs2),
+                                  vti.RegClass:$passthru,
+                                  (vti.Mask VMV0:$vm),
+                                  VLOpFrag),
+              (!cast<Instruction>("PseudoVFSGNJ_ALT_V"#vti.ScalarSuffix#"_"# vti.LMul.MX#"_E"#vti.SEW#"_MASK")
+                   vti.RegClass:$passthru, vti.RegClass:$rs1,
+                   vti.ScalarRegClass:$rs2, (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW,
+                   TAIL_AGNOSTIC)>;
+    }
+  }
 } // Predicates = [HasStdExtZvfbfa]
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
new file mode 100644
index 0000000000000..9cfed6a659c64
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-sdnode.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfa \
+; RUN:     -target-abi=ilp32d -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfa \
+; RUN:     -target-abi=lp64d -verify-machineinstrs < %s | FileCheck %s
+
+define <2 x bfloat> @copysign_v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e16alt, mf4, ta, ma
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %r = call <2 x bfloat> @llvm.copysign.v2bf16(<2 x bfloat> %vm, <2 x bfloat> %vs)
+  ret <2 x bfloat> %r
+}
+
+define <4 x bfloat> @copysign_v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e16alt, mf2, ta, ma
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %r = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %vm, <4 x bfloat> %vs)
+  ret <4 x bfloat> %r
+}
+
+define <8 x bfloat> @copysign_v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e16alt, m1, ta, ma
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v9
+; CHECK-NEXT:    ret
+  %r = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %vm, <8 x bfloat> %vs)
+  ret <8 x bfloat> %r
+}
+
+define <16 x bfloat> @copysign_v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 16, e16alt, m2, ta, ma
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v10
+; CHECK-NEXT:    ret
+  %r = call <16 x bfloat> @llvm.copysign.v16bf16(<16 x bfloat> %vm, <16 x bfloat> %vs)
+  ret <16 x bfloat> %r
+}
+
+define <32 x bfloat> @copysign_v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs) {
+; CHECK-LABEL: copysign_v32bf32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    li a0, 32
+; CHECK-NEXT:    vsetvli zero, a0, e16alt, m4, ta, ma
+; CHECK-NEXT:    vfsgnj.vv v8, v8, v12
+; CHECK-NEXT:    ret
+  %r = call <32 x bfloat> @llvm.copysign.v32bf32(<32 x bfloat> %vm, <32 x bfloat> %vs)
+  ret <32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
index a2178e1c571da..967ca092fe3c1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll
@@ -1,8 +1,180 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+v -target-abi=ilp32d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+v -target-abi=lp64d \
-; RUN:   -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+zvfbfmin,+v -target-abi=lp64d \
+; RUN:   -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFH %s
+; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=ilp32d \
+; RUN:   -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zvfh,+experimental-zvfbfa,+v -target-abi=lp64d \
+; RUN:   -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,ZVFBFA %s
+
+declare <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat>, <2 x bfloat>, <2 x i1>, i32)
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT:    vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT:    vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl)
+  ret <2 x bfloat> %v
+}
+
+define <2 x bfloat> @vfsgnj_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, mf4, ta, ma
+; ZVFH-NEXT:    vand.vx v9, v9, a1
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1
+; ZVFH-NEXT:    vor.vv v8, v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v2bf16_unmasked:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, mf4, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <2 x bfloat> @llvm.vp.copysign.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl)
+  ret <2 x bfloat> %v
+}
+
+declare <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat>, <4 x bfloat>, <4 x i1>, i32)
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT:    vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT:    vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl)
+  ret <4 x bfloat> %v
+}
+
+define <4 x bfloat> @vfsgnj_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, mf2, ta, ma
+; ZVFH-NEXT:    vand.vx v9, v9, a1
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1
+; ZVFH-NEXT:    vor.vv v8, v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v4bf16_unmasked:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, mf2, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <4 x bfloat> @llvm.vp.copysign.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl)
+  ret <4 x bfloat> %v
+}
+
+declare <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat>, <8 x bfloat>, <8 x i1>, i32)
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT:    vand.vx v9, v9, a1, v0.t
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT:    vor.vv v8, v8, v9, v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v9, v0.t
+; ZVFBFA-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl)
+  ret <8 x bfloat> %v
+}
+
+define <8 x bfloat> @vfsgnj_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; ZVFH-NEXT:    vand.vx v9, v9, a1
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1
+; ZVFH-NEXT:    vor.vv v8, v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v8bf16_unmasked:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, m1, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v9
+; ZVFBFA-NEXT:    ret
+  %v = call <8 x bfloat> @llvm.vp.copysign.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl)
+  ret <8 x bfloat> %v
+}
+
+declare <16 x bfloat> @llvm.vp.copysign.v16bf16(<16 x bfloat>, <16 x bfloat>, <16 x i1>, i32)
+
+define <16 x bfloat> @vfsgnj_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
+; ZVFH-LABEL: vfsgnj_vv_v16bf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    lui a1, 8
+; ZVFH-NEXT:    vsetvli zero, a0, e16, m2, ta, ma
+; ZVFH-NEXT:    vand.vx v10, v10, a1, v0.t
+; ZVFH-NEXT:    addi a1, a1, -1
+; ZVFH-NEXT:    vand.vx v8, v8, a1, v0.t
+; ZVFH-NEXT:    vor.vv v8, v8, v10, v0.t
+; ZVFH-NEXT:    ret
+;
+; ZVFBFA-LABEL: vfsgnj_vv_v16bf16:
+; ZVFBFA:       # %bb.0:
+; ZVFBFA-NEXT:    vsetvli zero, a0, e16alt, m2, ta, ma
+; ZVFBFA-NEXT:    vfsgnj.vv v8, v8, v10, v0.t
+; ZVFBFA-NEXT:    ret
+  %v = call <16 x ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/166944