[llvm] [RISCV] Expand bf16 FNEG/FABS/FCOPYSIGN (PR #108245)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 09:32:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
The motivation for this is to start promoting bf16 ops to f32 so that we can mark bf16 as a supported type in RISCVTTIImpl::isElementTypeLegalForScalableVector and scalably-vectorize it.
This starts with expanding the nodes that can't be promoted to f32 due to canonicalizing NaNs, similarly to f16 in #<!-- -->106652.
---
Full diff: https://github.com/llvm/llvm-project/pull/108245.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5)
- (added) llvm/test/CodeGen/RISCV/rvv/vfabs-bf16-sdnode.ll (+75)
- (added) llvm/test/CodeGen/RISCV/rvv/vfcopysign-bf16-sdnode.ll (+87)
- (added) llvm/test/CodeGen/RISCV/rvv/vfneg-bf16-sdnode.ll (+69)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4554163d4551dc..3675a872dbf40c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1118,6 +1118,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.hasStdExtZfbfmin())
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+
// TODO: Promote to fp32.
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-bf16-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-bf16-sdnode.ll
new file mode 100644
index 00000000000000..1c6a57d99dd0af
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-bf16-sdnode.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+
+define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %v) {
+; CHECK-LABEL: nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %r = call <vscale x 1 x bfloat> @llvm.fabs.nxv1bf16(<vscale x 1 x bfloat> %v)
+ ret <vscale x 1 x bfloat> %r
+}
+
+define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %v) {
+; CHECK-LABEL: nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %v)
+ ret <vscale x 2 x bfloat> %r
+}
+
+define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %v) {
+; CHECK-LABEL: nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %v)
+ ret <vscale x 4 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %v) {
+; CHECK-LABEL: nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %v)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %v) {
+; CHECK-LABEL: nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %r = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> %v)
+ ret <vscale x 16 x bfloat> %r
+}
+
+define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %v) {
+; CHECK-LABEL: nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %r = call <vscale x 32 x bfloat> @llvm.fabs.nxv32bf16(<vscale x 32 x bfloat> %v)
+ ret <vscale x 32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-bf16-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-bf16-sdnode.ll
new file mode 100644
index 00000000000000..ee050baee1f791
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-bf16-sdnode.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+
+define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs) {
+; CHECK-LABEL: nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs)
+ ret <vscale x 1 x bfloat> %r
+}
+
+define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs) {
+; CHECK-LABEL: nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs)
+ ret <vscale x 2 x bfloat> %r
+}
+
+define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs) {
+; CHECK-LABEL: nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vand.vx v9, v9, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs)
+ ret <vscale x 4 x bfloat> %r
+}
+
+define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs) {
+; CHECK-LABEL: nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vand.vx v10, v10, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vor.vv v8, v8, v10
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs)
+ ret <vscale x 8 x bfloat> %r
+}
+
+define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs) {
+; CHECK-LABEL: nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vand.vx v12, v12, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vor.vv v8, v8, v12
+; CHECK-NEXT: ret
+ %r = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs)
+ ret <vscale x 16 x bfloat> %r
+}
+
+define <vscale x 32 x bfloat> @nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs) {
+; CHECK-LABEL: nxv32bf32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vand.vx v16, v16, a0
+; CHECK-NEXT: addi a0, a0, -1
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vor.vv v8, v8, v16
+; CHECK-NEXT: ret
+ %r = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs)
+ ret <vscale x 32 x bfloat> %r
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-bf16-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-bf16-sdnode.ll
new file mode 100644
index 00000000000000..8fef7f60d99f64
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-bf16-sdnode.ll
@@ -0,0 +1,69 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+
+define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %va) {
+; CHECK-LABEL: nxv1bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = fneg <vscale x 1 x bfloat> %va
+ ret <vscale x 1 x bfloat> %vb
+}
+
+define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %va) {
+; CHECK-LABEL: nxv2bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = fneg <vscale x 2 x bfloat> %va
+ ret <vscale x 2 x bfloat> %vb
+}
+
+define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %va) {
+; CHECK-LABEL: nxv4bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = fneg <vscale x 4 x bfloat> %va
+ ret <vscale x 4 x bfloat> %vb
+}
+
+define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %va) {
+; CHECK-LABEL: nxv8bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = fneg <vscale x 8 x bfloat> %va
+ ret <vscale x 8 x bfloat> %vb
+}
+
+define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %va) {
+; CHECK-LABEL: nxv16bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = fneg <vscale x 16 x bfloat> %va
+ ret <vscale x 16 x bfloat> %vb
+}
+
+define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %va) {
+; CHECK-LABEL: nxv32bf16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lui a0, 8
+; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; CHECK-NEXT: vxor.vx v8, v8, a0
+; CHECK-NEXT: ret
+ %vb = fneg <vscale x 32 x bfloat> %va
+ ret <vscale x 32 x bfloat> %vb
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/108245
More information about the llvm-commits
mailing list