[llvm] [RISCV] Custom legalize vXf16 BUILD_VECTOR without Zfhmin. (PR #97874)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 5 20:05:17 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
If we don't have Zfhmin, we will call SoftPromoteHalf on the scalar f16 operands. This isn't supported by the generic type legalizer.
Instead, custom lower to a vXi16 BUILD_VECTOR using bitcasts.
Fixes #<!-- -->97849.
---
Full diff: https://github.com/llvm/llvm-project/pull/97874.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+20)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+169-37)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1e37f2c3b9c59..3b7b4fb10d923 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1329,6 +1329,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
// FIXME: We should prefer BUILD_VECTOR over SPLAT_VECTOR.
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
+ // We need to custom legalize f16 build vectors if Zfhmin isn't
+ // available.
+ if (!Subtarget.hasStdExtZfhminOrZhinxmin())
+ setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
// Don't promote f16 vector operations to f32 if f32 vector type is
// not legal.
@@ -3901,11 +3905,27 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
return SDValue();
}
+// Convert to an vXf16 build_vector to vXi16 with bitcasts.
+static SDValue lowerBUILD_VECTORvXf16(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getSimpleValueType();
+ MVT IVT = VT.changeVectorElementType(MVT::i16);
+ SmallVector<SDValue, 16> NewOps(Op.getNumOperands());
+ for (unsigned I = 0, E = Op.getNumOperands(); I != E; ++I)
+ NewOps[I] = DAG.getBitcast(MVT::i16, Op.getOperand(I));
+ SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), IVT, NewOps);
+ return DAG.getBitcast(VT, Res);
+}
+
static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
MVT VT = Op.getSimpleValueType();
assert(VT.isFixedLengthVector() && "Unexpected vector!");
+ // If we don't have scalar f16, we need to bitcast to an i16 vector.
+ if (VT.getVectorElementType() == MVT::f16 &&
+ !Subtarget.hasStdExtZfhminOrZhinxmin())
+ return lowerBUILD_VECTORvXf16(Op, DAG);
+
if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
return lowerBuildVectorOfConstants(Op, DAG, Subtarget);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index f7477da49a354..eb7f6b1bb6540 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1,9 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64V
-; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RVA22U64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zfh,+zvfh,+zba,+zbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFH
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RV64V
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+rva22u64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFH,RVA22U64
+; RUN: llc -mtriple=riscv32 -target-abi=ilp32d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32ZVFHMIN
+; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zvfhmin,+f,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64ZVFHMIN
; Tests that a floating-point build_vector doesn't try and generate a VID
; instruction
@@ -249,6 +251,20 @@ define dso_local void @splat_load_licm(ptr %0) {
; RVA22U64-NEXT: bne a0, a1, .LBB12_1
; RVA22U64-NEXT: # %bb.2:
; RVA22U64-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: splat_load_licm:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: lui a1, 1
+; RV64ZVFHMIN-NEXT: add a1, a0, a1
+; RV64ZVFHMIN-NEXT: lui a2, 263168
+; RV64ZVFHMIN-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64ZVFHMIN-NEXT: vmv.v.x v8, a2
+; RV64ZVFHMIN-NEXT: .LBB12_1: # =>This Inner Loop Header: Depth=1
+; RV64ZVFHMIN-NEXT: vse32.v v8, (a0)
+; RV64ZVFHMIN-NEXT: addi a0, a0, 16
+; RV64ZVFHMIN-NEXT: bne a0, a1, .LBB12_1
+; RV64ZVFHMIN-NEXT: # %bb.2:
+; RV64ZVFHMIN-NEXT: ret
br label %2
2: ; preds = %2, %1
@@ -265,12 +281,37 @@ define dso_local void @splat_load_licm(ptr %0) {
}
define <2 x half> @buildvec_v2f16(half %a, half %b) {
-; CHECK-LABEL: buildvec_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfmv.v.f v8, fa0
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
-; CHECK-NEXT: ret
+; RV32ZVFH-LABEL: buildvec_v2f16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFH-NEXT: vfmv.v.f v8, fa0
+; RV32ZVFH-NEXT: vfslide1down.vf v8, v8, fa1
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: buildvec_v2f16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFH-NEXT: vfmv.v.f v8, fa0
+; RV64ZVFH-NEXT: vfslide1down.vf v8, v8, fa1
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: buildvec_v2f16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: fmv.x.w a0, fa1
+; RV32ZVFHMIN-NEXT: fmv.x.w a1, fa0
+; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFHMIN-NEXT: vmv.v.x v8, a1
+; RV32ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: buildvec_v2f16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: fmv.x.w a0, fa1
+; RV64ZVFHMIN-NEXT: fmv.x.w a1, fa0
+; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFHMIN-NEXT: vmv.v.x v8, a1
+; RV64ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0
+; RV64ZVFHMIN-NEXT: ret
%v1 = insertelement <2 x half> poison, half %a, i64 0
%v2 = insertelement <2 x half> %v1, half %b, i64 1
ret <2 x half> %v2
@@ -1297,45 +1338,136 @@ entry:
}
define <2 x half> @vid_v2f16() {
-; CHECK-LABEL: vid_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: vfcvt.f.x.v v8, v8
-; CHECK-NEXT: ret
+; RV32ZVFH-LABEL: vid_v2f16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFH-NEXT: vid.v v8
+; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vid_v2f16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFH-NEXT: vid.v v8
+; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vid_v2f16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: lui a0, 245760
+; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vid_v2f16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: lui a0, 245760
+; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVFHMIN-NEXT: ret
ret <2 x half> <half 0.0, half 1.0>
}
define <2 x half> @vid_addend1_v2f16() {
-; CHECK-LABEL: vid_addend1_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: vadd.vi v8, v8, 1
-; CHECK-NEXT: vfcvt.f.x.v v8, v8
-; CHECK-NEXT: ret
+; RV32ZVFH-LABEL: vid_addend1_v2f16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFH-NEXT: vid.v v8
+; RV32ZVFH-NEXT: vadd.vi v8, v8, 1
+; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vid_addend1_v2f16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFH-NEXT: vid.v v8
+; RV64ZVFH-NEXT: vadd.vi v8, v8, 1
+; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vid_addend1_v2f16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: lui a0, 262148
+; RV32ZVFHMIN-NEXT: addi a0, a0, -1024
+; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vid_addend1_v2f16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: lui a0, 262148
+; RV64ZVFHMIN-NEXT: addi a0, a0, -1024
+; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVFHMIN-NEXT: ret
ret <2 x half> <half 1.0, half 2.0>
}
define <2 x half> @vid_denominator2_v2f16() {
-; CHECK-LABEL: vid_denominator2_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI28_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0)
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32ZVFH-LABEL: vid_denominator2_v2f16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: lui a0, %hi(.LCPI28_0)
+; RV32ZVFH-NEXT: addi a0, a0, %lo(.LCPI28_0)
+; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFH-NEXT: vle16.v v8, (a0)
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vid_denominator2_v2f16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: lui a0, %hi(.LCPI28_0)
+; RV64ZVFH-NEXT: addi a0, a0, %lo(.LCPI28_0)
+; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFH-NEXT: vle16.v v8, (a0)
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vid_denominator2_v2f16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: lui a0, 245764
+; RV32ZVFHMIN-NEXT: addi a0, a0, -2048
+; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV32ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vid_denominator2_v2f16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: lui a0, 245764
+; RV64ZVFHMIN-NEXT: addi a0, a0, -2048
+; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e32, m1, ta, ma
+; RV64ZVFHMIN-NEXT: vmv.s.x v8, a0
+; RV64ZVFHMIN-NEXT: ret
ret <2 x half> <half 0.5, half 1.0>
}
define <2 x half> @vid_step2_v2f16() {
-; CHECK-LABEL: vid_step2_v2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: vadd.vv v8, v8, v8
-; CHECK-NEXT: vfcvt.f.x.v v8, v8
-; CHECK-NEXT: ret
+; RV32ZVFH-LABEL: vid_step2_v2f16:
+; RV32ZVFH: # %bb.0:
+; RV32ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFH-NEXT: vid.v v8
+; RV32ZVFH-NEXT: vadd.vv v8, v8, v8
+; RV32ZVFH-NEXT: vfcvt.f.x.v v8, v8
+; RV32ZVFH-NEXT: ret
+;
+; RV64ZVFH-LABEL: vid_step2_v2f16:
+; RV64ZVFH: # %bb.0:
+; RV64ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFH-NEXT: vid.v v8
+; RV64ZVFH-NEXT: vadd.vv v8, v8, v8
+; RV64ZVFH-NEXT: vfcvt.f.x.v v8, v8
+; RV64ZVFH-NEXT: ret
+;
+; RV32ZVFHMIN-LABEL: vid_step2_v2f16:
+; RV32ZVFHMIN: # %bb.0:
+; RV32ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV32ZVFHMIN-NEXT: vid.v v8
+; RV32ZVFHMIN-NEXT: vsll.vi v8, v8, 14
+; RV32ZVFHMIN-NEXT: ret
+;
+; RV64ZVFHMIN-LABEL: vid_step2_v2f16:
+; RV64ZVFHMIN: # %bb.0:
+; RV64ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; RV64ZVFHMIN-NEXT: vid.v v8
+; RV64ZVFHMIN-NEXT: vsll.vi v8, v8, 14
+; RV64ZVFHMIN-NEXT: ret
ret <2 x half> <half 0.0, half 2.0>
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/97874
More information about the llvm-commits
mailing list