[llvm] [RISCV] Expand bf16 vector truncstores and extloads (PR #108235)

Wed Sep 11 07:45:07 PDT 2024

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/108235

Previously they were legal by default, so the truncstore/extload test cases would get combined and crash during selection.
These are set to expand for f16 so do the same for bf16.


>From 0a36f6f72bec63039b71c709df5d17d1155fa36e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 11 Sep 2024 22:38:25 +0800
Subject: [PATCH] [RISCV] Expand bf16 vector truncstores and extloads

Previously they were legal by default, so the truncstore/extload test cases would get combined and crash during selection.
These are set to expand for f16 so do the same for bf16.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  2 +
 llvm/test/CodeGen/RISCV/rvv/load-bf16.ll    | 71 +++++++++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/store-bf16.ll   | 71 +++++++++++++++++++++
 3 files changed, 144 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/load-bf16.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/store-bf16.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 23f2b0e96495e9..f5d599e5127a76 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1128,6 +1128,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           continue;
         SetCommonVFPActions(VT);
         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+        SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
       }
     }
 
@@ -1137,6 +1138,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
           continue;
         SetCommonVFPActions(VT);
         SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
+        SetCommonVFPExtLoadTruncStoreActions(VT, BF16VecVTs);
         SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
       }
     }
diff --git a/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll
new file mode 100644
index 00000000000000..1108bb16b6712b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/load-bf16.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+
+define <vscale x 1 x bfloat> @load_nxv1bf16(ptr %p) {
+; CHECK-LABEL: load_nxv1bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %x = load <vscale x 1 x bfloat>, ptr %p
+  ret <vscale x 1 x bfloat> %x
+}
+
+define <vscale x 2 x bfloat> @load_nxv2bf16(ptr %p) {
+; CHECK-LABEL: load_nxv2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vle16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %x = load <vscale x 2 x bfloat>, ptr %p
+  ret <vscale x 2 x bfloat> %x
+}
+
+define <vscale x 4 x bfloat> @load_nxv4bf16(ptr %p) {
+; CHECK-LABEL: load_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %x = load <vscale x 4 x bfloat>, ptr %p
+  ret <vscale x 4 x bfloat> %x
+}
+
+define <vscale x 8 x bfloat> @load_nxv8bf16(ptr %p) {
+; CHECK-LABEL: load_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl2re16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %x = load <vscale x 8 x bfloat>, ptr %p
+  ret <vscale x 8 x bfloat> %x
+}
+
+define <vscale x 16 x bfloat> @load_nxv16bf16(ptr %p) {
+; CHECK-LABEL: load_nxv16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl4re16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %x = load <vscale x 16 x bfloat>, ptr %p
+  ret <vscale x 16 x bfloat> %x
+}
+
+define <vscale x 32 x bfloat> @load_nxv32bf16(ptr %p) {
+; CHECK-LABEL: load_nxv32bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl8re16.v v8, (a0)
+; CHECK-NEXT:    ret
+  %x = load <vscale x 32 x bfloat>, ptr %p
+  ret <vscale x 32 x bfloat> %x
+}
+
+define <vscale x 4 x float> @extload(ptr %p) {
+; CHECK-LABEL: extload:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vl1re16.v v10, (a0)
+; CHECK-NEXT:    vsetvli a0, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfwcvtbf16.f.f.v v8, v10
+; CHECK-NEXT:    ret
+  %x = load <vscale x 4 x bfloat>, ptr %p
+  %y = fpext <vscale x 4 x bfloat> %x to <vscale x 4 x float>
+  ret <vscale x 4 x float> %y
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll
new file mode 100644
index 00000000000000..30cbf9a3903190
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/store-bf16.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
+
+define void @store_nxv1bf16(<vscale x 1 x bfloat> %v, ptr %p) {
+; CHECK-LABEL: store_nxv1bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf4, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <vscale x 1 x bfloat> %v, ptr %p
+  ret void
+}
+
+define void @store_nxv2bf16(<vscale x 2 x bfloat> %v, ptr %p) {
+; CHECK-LABEL: store_nxv2bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vse16.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <vscale x 2 x bfloat> %v, ptr %p
+  ret void
+}
+
+define void @store_nxv4bf16(<vscale x 4 x bfloat> %v, ptr %p) {
+; CHECK-LABEL: store_nxv4bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vs1r.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <vscale x 4 x bfloat> %v, ptr %p
+  ret void
+}
+
+define void @store_nxv8bf16(<vscale x 8 x bfloat> %v, ptr %p) {
+; CHECK-LABEL: store_nxv8bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vs2r.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <vscale x 8 x bfloat> %v, ptr %p
+  ret void
+}
+
+define void @store_nxv16bf16(<vscale x 16 x bfloat> %v, ptr %p) {
+; CHECK-LABEL: store_nxv16bf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vs4r.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <vscale x 16 x bfloat> %v, ptr %p
+  ret void
+}
+
+define void @store_nxv32bf32(<vscale x 32 x bfloat> %v, ptr %p) {
+; CHECK-LABEL: store_nxv32bf32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vs8r.v v8, (a0)
+; CHECK-NEXT:    ret
+  store <vscale x 32 x bfloat> %v, ptr %p
+  ret void
+}
+
+define void @truncstore(<vscale x 4 x float> %v, ptr %p) {
+; CHECK-LABEL: truncstore:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a1, zero, e16, m1, ta, ma
+; CHECK-NEXT:    vfncvtbf16.f.f.w v10, v8
+; CHECK-NEXT:    vs1r.v v10, (a0)
+; CHECK-NEXT:    ret
+  %w = fptrunc <vscale x 4 x float> %v to <vscale x 4 x bfloat>
+  store <vscale x 4 x bfloat> %w, ptr %p
+  ret void
+}