[llvm] [RISCV] Promote fixed-length bf16 arith vector ops with zvfbfmin (PR #112393)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 15 09:25:39 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
The aim is to have the same set of promotions on fixed-length bf16 vectors as on fixed-length f16 vectors, and then deduplicate them similarly to what was done for scalable vectors.
It looks like fneg/fabs/fcopysign end up getting expanded because fsub is now legal, and the default operation action must be expand.
---
Patch is 81.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/112393.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+7)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+1648-83)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index cde690793f0702..ae61b03a4aa3b6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1380,6 +1380,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
{ISD::VP_MERGE, ISD::VP_SELECT, ISD::VSELECT, ISD::SELECT}, VT,
Custom);
// TODO: Promote to fp32.
+ MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ // Don't promote f16 vector operations to f32 if f32 vector type is
+ // not legal.
+ // TODO: could split the f16 vector into two vectors and do promotion.
+ if (!isTypeLegal(F32VecVT))
+ continue;
+ setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
continue;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 7ecf8af54c8dc0..c24ade1e6d8eff 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -1,8 +1,52 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
+
+
+define void @fadd_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fadd_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = load <8 x bfloat>, ptr %y
+ %c = fadd <8 x bfloat> %a, %b
+ store <8 x bfloat> %c, ptr %x
+ ret void
+}
+
+define void @fadd_v6bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fadd_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v12, v10
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = load <6 x bfloat>, ptr %y
+ %c = fadd <6 x bfloat> %a, %b
+ store <6 x bfloat> %c, ptr %x
+ ret void
+}
define void @fadd_v8f16(ptr %x, ptr %y) {
; ZVFH-LABEL: fadd_v8f16:
@@ -97,6 +141,49 @@ define void @fadd_v2f64(ptr %x, ptr %y) {
ret void
}
+define void @fsub_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fsub_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = load <8 x bfloat>, ptr %y
+ %c = fsub <8 x bfloat> %a, %b
+ store <8 x bfloat> %c, ptr %x
+ ret void
+}
+
+define void @fsub_v6bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fsub_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v12, v10
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = load <6 x bfloat>, ptr %y
+ %c = fsub <6 x bfloat> %a, %b
+ store <6 x bfloat> %c, ptr %x
+ ret void
+}
+
define void @fsub_v8f16(ptr %x, ptr %y) {
; ZVFH-LABEL: fsub_v8f16:
; ZVFH: # %bb.0:
@@ -190,6 +277,49 @@ define void @fsub_v2f64(ptr %x, ptr %y) {
ret void
}
+define void @fmul_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fmul_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = load <8 x bfloat>, ptr %y
+ %c = fmul <8 x bfloat> %a, %b
+ store <8 x bfloat> %c, ptr %x
+ ret void
+}
+
+define void @fmul_v6bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fmul_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v12, v10
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = load <6 x bfloat>, ptr %y
+ %c = fmul <6 x bfloat> %a, %b
+ store <6 x bfloat> %c, ptr %x
+ ret void
+}
+
define void @fmul_v8f16(ptr %x, ptr %y) {
; ZVFH-LABEL: fmul_v8f16:
; ZVFH: # %bb.0:
@@ -283,6 +413,49 @@ define void @fmul_v2f64(ptr %x, ptr %y) {
ret void
}
+define void @fdiv_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fdiv_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v8, v12, v10
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = load <8 x bfloat>, ptr %y
+ %c = fdiv <8 x bfloat> %a, %b
+ store <8 x bfloat> %c, ptr %x
+ ret void
+}
+
+define void @fdiv_v6bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: fdiv_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
+; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfdiv.vv v8, v12, v10
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = load <6 x bfloat>, ptr %y
+ %c = fdiv <6 x bfloat> %a, %b
+ store <6 x bfloat> %c, ptr %x
+ ret void
+}
+
define void @fdiv_v8f16(ptr %x, ptr %y) {
; ZVFH-LABEL: fdiv_v8f16:
; ZVFH: # %bb.0:
@@ -376,6 +549,36 @@ define void @fdiv_v2f64(ptr %x, ptr %y) {
ret void
}
+define void @fneg_v8bf16(ptr %x) {
+; CHECK-LABEL: fneg_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: vxor.vx v8, v8, a1
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = fneg <8 x bfloat> %a
+ store <8 x bfloat> %b, ptr %x
+ ret void
+}
+
+define void @fneg_v6bf16(ptr %x) {
+; CHECK-LABEL: fneg_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: vxor.vx v8, v8, a1
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = fneg <6 x bfloat> %a
+ store <6 x bfloat> %b, ptr %x
+ ret void
+}
+
define void @fneg_v8f16(ptr %x) {
; ZVFH-LABEL: fneg_v8f16:
; ZVFH: # %bb.0:
@@ -450,6 +653,38 @@ define void @fneg_v2f64(ptr %x) {
ret void
}
+define void @fabs_v8bf16(ptr %x) {
+; CHECK-LABEL: fabs_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = call <8 x bfloat> @llvm.fabs.v8bf16(<8 x bfloat> %a)
+ store <8 x bfloat> %b, ptr %x
+ ret void
+}
+
+define void @fabs_v6bf16(ptr %x) {
+; CHECK-LABEL: fabs_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = call <6 x bfloat> @llvm.fabs.v6bf16(<6 x bfloat> %a)
+ store <6 x bfloat> %b, ptr %x
+ ret void
+}
+
define void @fabs_v8f16(ptr %x) {
; ZVFH-LABEL: fabs_v8f16:
; ZVFH: # %bb.0:
@@ -473,7 +708,6 @@ define void @fabs_v8f16(ptr %x) {
store <8 x half> %b, ptr %x
ret void
}
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
define void @fabs_v6f16(ptr %x) {
; ZVFH-LABEL: fabs_v6f16:
@@ -498,7 +732,6 @@ define void @fabs_v6f16(ptr %x) {
store <6 x half> %b, ptr %x
ret void
}
-declare <6 x half> @llvm.fabs.v6f16(<6 x half>)
define void @fabs_v4f32(ptr %x) {
; CHECK-LABEL: fabs_v4f32:
@@ -513,7 +746,6 @@ define void @fabs_v4f32(ptr %x) {
store <4 x float> %b, ptr %x
ret void
}
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
define void @fabs_v2f64(ptr %x) {
; CHECK-LABEL: fabs_v2f64:
@@ -528,7 +760,48 @@ define void @fabs_v2f64(ptr %x) {
store <2 x double> %b, ptr %x
ret void
}
-declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
+
+define void @copysign_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = load <8 x bfloat>, ptr %y
+ %c = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %b)
+ store <8 x bfloat> %c, ptr %x
+ ret void
+}
+
+define void @copysign_v6bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: addi a1, a1, -1
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = load <6 x bfloat>, ptr %y
+ %c = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %b)
+ store <6 x bfloat> %c, ptr %x
+ ret void
+}
define void @copysign_v8f16(ptr %x, ptr %y) {
; ZVFH-LABEL: copysign_v8f16:
@@ -558,7 +831,6 @@ define void @copysign_v8f16(ptr %x, ptr %y) {
store <8 x half> %c, ptr %x
ret void
}
-declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
define void @copysign_v6f16(ptr %x, ptr %y) {
; ZVFH-LABEL: copysign_v6f16:
@@ -590,7 +862,6 @@ define void @copysign_v6f16(ptr %x, ptr %y) {
store <6 x half> %c, ptr %x
ret void
}
-declare <6 x half> @llvm.copysign.v6f16(<6 x half>, <6 x half>)
define void @copysign_v4f32(ptr %x, ptr %y) {
; CHECK-LABEL: copysign_v4f32:
@@ -607,7 +878,6 @@ define void @copysign_v4f32(ptr %x, ptr %y) {
store <4 x float> %c, ptr %x
ret void
}
-declare <4 x float> @llvm.copysign.v4f32(<4 x float>, <4 x float>)
define void @copysign_v2f64(ptr %x, ptr %y) {
; CHECK-LABEL: copysign_v2f64:
@@ -624,7 +894,52 @@ define void @copysign_v2f64(ptr %x, ptr %y) {
store <2 x double> %c, ptr %x
ret void
}
-declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>)
+
+define void @copysign_vf_v8bf16(ptr %x, bfloat %y) {
+; CHECK-LABEL: copysign_vf_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.w a1, fa0
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = insertelement <8 x bfloat> poison, bfloat %y, i32 0
+ %c = shufflevector <8 x bfloat> %b, <8 x bfloat> poison, <8 x i32> zeroinitializer
+ %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c)
+ store <8 x bfloat> %d, ptr %x
+ ret void
+}
+
+define void @copysign_vf_v6bf16(ptr %x, bfloat %y) {
+; CHECK-LABEL: copysign_vf_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: fmv.x.w a1, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v9, a1
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = insertelement <6 x bfloat> poison, bfloat %y, i32 0
+ %c = shufflevector <6 x bfloat> %b, <6 x bfloat> poison, <6 x i32> zeroinitializer
+ %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c)
+ store <6 x bfloat> %d, ptr %x
+ ret void
+}
define void @copysign_vf_v8f16(ptr %x, half %y) {
; ZVFH-LABEL: copysign_vf_v8f16:
@@ -720,6 +1035,52 @@ define void @copysign_vf_v2f64(ptr %x, double %y) {
ret void
}
+define void @copysign_neg_v8bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_neg_v8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: vxor.vx v8, v8, a1
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: vand.vx v9, v9, a2
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <8 x bfloat>, ptr %x
+ %b = load <8 x bfloat>, ptr %y
+ %c = fneg <8 x bfloat> %b
+ %d = call <8 x bfloat> @llvm.copysign.v8bf16(<8 x bfloat> %a, <8 x bfloat> %c)
+ store <8 x bfloat> %d, ptr %x
+ ret void
+}
+
+define void @copysign_neg_v6bf16(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_neg_v6bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a1
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: vand.vx v9, v9, a2
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vor.vv v8, v9, v8
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x bfloat>, ptr %x
+ %b = load <6 x bfloat>, ptr %y
+ %c = fneg <6 x bfloat> %b
+ %d = call <6 x bfloat> @llvm.copysign.v6bf16(<6 x bfloat> %a, <6 x bfloat> %c)
+ store <6 x bfloat> %d, ptr %x
+ ret void
+}
+
define void @copysign_neg_v8f16(ptr %x, ptr %y) {
; ZVFH-LABEL: copysign_neg_v8f16:
; ZVFH: # %bb.0:
@@ -818,6 +1179,56 @@ define void @copysign_neg_v2f64(ptr %x, ptr %y) {
ret void
}
+define void @copysign_neg_trunc_v4bf16_v4f32(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_neg_trunc_v4bf16_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9
+; CHECK-NEXT: vxor.vx v9, v10, a1
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <4 x bfloat>, ptr %x
+ %b = load <4 x float>, ptr %y
+ %c = fneg <4 x float> %b
+ %d = fptrunc <4 x float> %c to <4 x bfloat>
+ %e = call <4 x bfloat> @llvm.copysign.v4bf16(<4 x bfloat> %a, <4 x bfloat> %d)
+ store <4 x bfloat> %e, ptr %x
+ ret void
+}
+
+define void @copysign_neg_trunc_v3bf16_v3f32(ptr %x, ptr %y) {
+; CHECK-LABEL: copysign_neg_trunc_v3bf16_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle32.v v9, (a1)
+; CHECK-NEXT: lui a1, 8
+; CHECK-NEXT: addi a2, a1, -1
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a2
+; CHECK-NEXT: vfncvtbf16.f.f.w v10, v9
+; CHECK-NEXT: vxor.vx v9, v10, a1
+; CHECK-NEXT: vand.vx v9, v9, a1
+; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <3 x bfloat>, ptr %x
+ %b = load <3 x float>, ptr %y
+ %c = fneg <3 x float> %b
+ %d = fptrunc <3 x float> %c to <3 x bfloat>
+ %e = call <3 x bfloat> @llvm.copysign.v3bf16(<3 x bfloat> %a, <3 x bfloat> %d)
+ store <3 x bfloat> %e, ptr %x
+ ret void
+}
+
define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32:
; ZVFH: # %bb.0:
@@ -851,7 +1262,6 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
store <4 x half> %e, ptr %x
ret void
}
-declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32:
@@ -890,7 +1300,6 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
store <3 x half> %e, ptr %x
ret void
}
-declare <3 x half> @llvm.copysign.v3f16(<3 x half>, <3 x half>)
define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) {
; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32:
@@ -912,6 +1321,43 @@ define void @copysign_neg_ext_v2f64_v2f3...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/112393
More information about the llvm-commits
mailing list