[llvm] a8f8089 - [RISCV] Handle zvfhmin/zvfbfmin in lowerVECTOR_SHUFFLEAsVSlide1 (#114925)

Mon Nov 4 21:46:38 PST 2024

Author: Luke Lau
Date: 2024-11-05T13:46:35+08:00
New Revision: a8f80897ba4e41fa2ead50f9f6d652b80b4c51fb

URL: https://github.com/llvm/llvm-project/commit/a8f80897ba4e41fa2ead50f9f6d652b80b4c51fb
DIFF: https://github.com/llvm/llvm-project/commit/a8f80897ba4e41fa2ead50f9f6d652b80b4c51fb.diff

LOG: [RISCV] Handle zvfhmin/zvfbfmin in lowerVECTOR_SHUFFLEAsVSlide1 (#114925)

Most of lowerVECTOR_SHUFFLE lowers to nodes that work on f16 and bf16
vectors, with the exception of the vslide1 lowering which tries to emit
vfslide1s. Handle this case as an integer vslide1 via fmv.x.h.

Fixes #114893

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d2d03d4572dac8..96490cdec6c69d 100644

--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4817,6 +4817,24 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
 
   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
   auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+
+  // zvfhmin and zvfbfmin don't have vfslide1{down,up}.vf so use fmv.x.h +
+  // vslide1{down,up}.vx instead.
+  if (VT.getVectorElementType() == MVT::bf16 ||
+      (VT.getVectorElementType() == MVT::f16 &&
+       !Subtarget.hasVInstructionsF16())) {
+    MVT IntVT = ContainerVT.changeVectorElementTypeToInteger();
+    Splat =
+        DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, Subtarget.getXLenVT(), Splat);
+    V2 = DAG.getBitcast(
+        IntVT, convertToScalableVector(ContainerVT, V2, DAG, Subtarget));
+    SDValue Vec = DAG.getNode(
+        IsVSlidedown ? RISCVISD::VSLIDE1DOWN_VL : RISCVISD::VSLIDE1UP_VL, DL,
+        IntVT, DAG.getUNDEF(IntVT), V2, Splat, TrueMask, VL);
+    Vec = DAG.getBitcast(ContainerVT, Vec);
+    return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+  }
+
   auto OpCode = IsVSlidedown ?
     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
     (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
index f531ff3a835e45..563b90dfa47efd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1down.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -131,23 +133,61 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) {
   ret <4 x i64> %v1
 }
 
-define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
-; CHECK-LABEL: vslide1down_2xf16:
+define <2 x bfloat> @vslide1down_2xbf16(<2 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1down_2xbf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    ret
+  %vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0
+  %v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> <i32 1, i32 2>
+  ret <2 x bfloat> %v1
+}
+
+define <4 x bfloat> @vslide1down_4xbf16(<4 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1down_4xbf16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.h a0, fa0
+; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
 ; CHECK-NEXT:    ret
+  %vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0
+  %v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x bfloat> %v1
+}
+
+define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
+; ZVFH-LABEL: vslide1down_2xf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfslide1down.vf v8, v8, fa0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vslide1down_2xf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFHMIN-NEXT:    ret
   %vb = insertelement <2 x half> poison, half %b, i64 0
   %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 1, i32 2>
   ret <2 x half> %v1
 }
 
 define <4 x half> @vslide1down_4xf16(<4 x half> %v, half %b) {
-; CHECK-LABEL: vslide1down_4xf16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
-; CHECK-NEXT:    ret
+; ZVFH-LABEL: vslide1down_4xf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfslide1down.vf v8, v8, fa0
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vslide1down_4xf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslide1down.vx v8, v8, a0
+; ZVFHMIN-NEXT:    ret
   %vb = insertelement <4 x half> poison, half %b, i64 0
   %v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
   ret <4 x half> %v1

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
index b3390b6eeeccdb..0f6d68dc1a6c76 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-vslide1up.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zvfh,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfhmin,+zvfhmin,+zfbfmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN,RV64
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
@@ -140,25 +142,67 @@ define <4 x i64> @vslide1up_4xi64(<4 x i64> %v, i64 %b) {
   ret <4 x i64> %v1
 }
 
-define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) {
-; CHECK-LABEL: vslide1up_2xf16:
+define <2 x bfloat> @vslide1up_2xbf16(<2 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1up_2xbf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT:    vslide1up.vx v9, v8, a0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
-  %vb = insertelement <2 x half> poison, half %b, i64 0
-  %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 2, i32 0>
-  ret <2 x half> %v1
+  %vb = insertelement <2 x bfloat> poison, bfloat %b, i64 0
+  %v1 = shufflevector <2 x bfloat> %v, <2 x bfloat> %vb, <2 x i32> <i32 2, i32 0>
+  ret <2 x bfloat> %v1
 }
 
-define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) {
-; CHECK-LABEL: vslide1up_4xf16:
+define <4 x bfloat> @vslide1up_4xbf16(<4 x bfloat> %v, bfloat %b) {
+; CHECK-LABEL: vslide1up_4xbf16:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    fmv.x.h a0, fa0
 ; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vfslide1up.vf v9, v8, fa0
+; CHECK-NEXT:    vslide1up.vx v9, v8, a0
 ; CHECK-NEXT:    vmv1r.v v8, v9
 ; CHECK-NEXT:    ret
+  %vb = insertelement <4 x bfloat> poison, bfloat %b, i64 0
+  %v1 = shufflevector <4 x bfloat> %v, <4 x bfloat> %vb, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
+  ret <4 x bfloat> %v1
+}
+
+define <2 x half> @vslide1up_2xf16(<2 x half> %v, half %b) {
+; ZVFH-LABEL: vslide1up_2xf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFH-NEXT:    vfslide1up.vf v9, v8, fa0
+; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vslide1up_2xf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; ZVFHMIN-NEXT:    vslide1up.vx v9, v8, a0
+; ZVFHMIN-NEXT:    vmv1r.v v8, v9
+; ZVFHMIN-NEXT:    ret
+  %vb = insertelement <2 x half> poison, half %b, i64 0
+  %v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 2, i32 0>
+  ret <2 x half> %v1
+}
+
+define <4 x half> @vslide1up_4xf16(<4 x half> %v, half %b) {
+; ZVFH-LABEL: vslide1up_4xf16:
+; ZVFH:       # %bb.0:
+; ZVFH-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFH-NEXT:    vfslide1up.vf v9, v8, fa0
+; ZVFH-NEXT:    vmv1r.v v8, v9
+; ZVFH-NEXT:    ret
+;
+; ZVFHMIN-LABEL: vslide1up_4xf16:
+; ZVFHMIN:       # %bb.0:
+; ZVFHMIN-NEXT:    fmv.x.h a0, fa0
+; ZVFHMIN-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
+; ZVFHMIN-NEXT:    vslide1up.vx v9, v8, a0
+; ZVFHMIN-NEXT:    vmv1r.v v8, v9
+; ZVFHMIN-NEXT:    ret
   %vb = insertelement <4 x half> poison, half %b, i64 0
   %v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
   ret <4 x half> %v1