[llvm] 9357712 - [RISCV] Use vfslide1down for build_vectors of non-constant floats
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed May 24 10:48:35 PDT 2023
Author: Philip Reames
Date: 2023-05-24T10:48:26-07:00
New Revision: 9357712b50583b10e315ca59293803f9f012dda5
URL: https://github.com/llvm/llvm-project/commit/9357712b50583b10e315ca59293803f9f012dda5
DIFF: https://github.com/llvm/llvm-project/commit/9357712b50583b10e315ca59293803f9f012dda5.diff
LOG: [RISCV] Use vfslide1down for build_vectors of non-constant floats
This adds the vfslide1down (and vfslide1up for consistency) nodes. These mostly parallel the existing vslide1down/up nodes. (See note below on instruction semantics.) We then use the vfslide1down in build_vector lowering instead of going through the stack.
The specification is more than a bit vague on the meaning of these instructions. All we're given is "The vfslide1down instruction is defined analogously, but sources its scalar argument from an f register."
We have to combine this with a general note at the beginning of section 10. Vector Arithmetic Instruction Formats which reads: "For floating-point operations, the scalar can be taken from a scalar f register. If FLEN > SEW, the value in the f registers is checked for a valid NaN-boxed value, in which case the least-signicant SEW bits of the f register are used, else the canonical NaN value is used. Vector instructions where any floating-point vector operand’s EEW is not a supported floating-point type width (which includes when FLEN < SEW) are reserved.".
Note that floats are NaN-boxed when D is implemented.
Combining that all together, we're fine as long as the element type matches the vector type - which is does by construction. We shouldn't have legal vectors which hit the reserved encoding case. An assert is included, just to be careful.
Differential Revision: https://reviews.llvm.org/D151347
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.h
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a7af257e08ad..8cfd48a1ef60 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3218,15 +3218,13 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
// For constant vectors, use generic constant pool lowering. Otherwise,
// we'd have to materialize constants in GPRs just to move them into the
// vector.
- if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()))
+ if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
+ ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
return SDValue();
- // We can use a series of vslide1down instructions to move values in GPRs
- // into the appropriate place in the result vector. We use slide1down
- // to avoid the register group overlap constraint of vslide1up.
- if (VT.isFloatingPoint())
- // TODO: Use vfslide1down.
- return SDValue();
+ assert((!VT.isFloatingPoint() ||
+ VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) &&
+ "Illegal type which will result in reserved encoding");
const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
@@ -3243,8 +3241,10 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
Vec, Offset, Mask, VL, Policy);
UndefCount = 0;
}
- Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL);
+ auto OpCode =
+ VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+ Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ V, Mask, VL);
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
@@ -15161,6 +15161,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(VSLIDE1UP_VL)
NODE_NAME_CASE(VSLIDEDOWN_VL)
NODE_NAME_CASE(VSLIDE1DOWN_VL)
+ NODE_NAME_CASE(VFSLIDE1UP_VL)
+ NODE_NAME_CASE(VFSLIDE1DOWN_VL)
NODE_NAME_CASE(VID_VL)
NODE_NAME_CASE(VFNCVT_ROD_VL)
NODE_NAME_CASE(VECREDUCE_ADD_VL)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index a8c1917c2b52..5ddf94f5da0e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -164,6 +164,12 @@ enum NodeType : unsigned {
// value. The fourth and fifth operands are the mask and VL operands.
VSLIDE1UP_VL,
VSLIDE1DOWN_VL,
+ // Matches the semantics of vfslide1up/vfslide1down. The first operand is
+ // passthru operand, the second is source vector, third is a scalar value
+ // whose type matches the element type of the vectors. The fourth and fifth
+ // operands are the mask and VL operands.
+ VFSLIDE1UP_VL,
+ VFSLIDE1DOWN_VL,
// Matches the semantics of the vid.v instruction, with a mask and VL
// operand.
VID_VL,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index c34ebf6bc8e3..74a6f8b2776f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -2456,11 +2456,18 @@ def SDTRVVSlide1 : SDTypeProfile<1, 5, [
SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
SDTCisVT<5, XLenVT>
]>;
+def SDTRVVFSlide1 : SDTypeProfile<1, 5, [
+ SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisFP<0>,
+ SDTCisEltOfVec<3, 0>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>,
+ SDTCisVT<5, XLenVT>
+]>;
def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>;
def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>;
def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>;
def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>;
+def riscv_fslide1up_vl : SDNode<"RISCVISD::VFSLIDE1UP_VL", SDTRVVFSlide1, []>;
+def riscv_fslide1down_vl : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, []>;
foreach vti = AllIntegerVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
@@ -2495,6 +2502,35 @@ foreach vti = AllIntegerVectors in {
}
}
+foreach vti = AllFloatVectors in {
+ let Predicates = GetVTypePredicates<vti>.Predicates in {
+ def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector undef),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX)
+ vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+ def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd),
+ (vti.Vector vti.RegClass:$rs1),
+ vti.Scalar:$rs2, (vti.Mask true_mask),
+ VLOpFrag)),
+ (!cast<Instruction>("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU")
+ vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>;
+ }
+}
+
foreach vti = AllVectors in {
let Predicates = GetVTypePredicates<vti>.Predicates in {
def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3),
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index d4de78ba1a46..30ebcf7651f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -291,14 +291,9 @@ define dso_local void @splat_load_licm(float* %0) {
define <2 x half> @buildvec_v2f16(half %a, half %b) {
; CHECK-LABEL: buildvec_v2f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: fsh fa1, 14(sp)
-; CHECK-NEXT: fsh fa0, 12(sp)
-; CHECK-NEXT: addi a0, sp, 12
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x half> poison, half %a, i64 0
%v2 = insertelement <2 x half> %v1, half %b, i64 1
@@ -308,14 +303,9 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
define <2 x float> @buildvec_v2f32(float %a, float %b) {
; CHECK-LABEL: buildvec_v2f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: fsw fa1, 12(sp)
-; CHECK-NEXT: fsw fa0, 8(sp)
-; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x float> poison, float %a, i64 0
%v2 = insertelement <2 x float> %v1, float %b, i64 1
@@ -325,14 +315,9 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) {
define <2 x double> @buildvec_v2f64(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: fsd fa1, 8(sp)
-; CHECK-NEXT: fsd fa0, 0(sp)
-; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x double> poison, double %a, i64 0
%v2 = insertelement <2 x double> %v1, double %b, i64 1
@@ -342,14 +327,9 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) {
define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64_b:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: fsd fa1, 8(sp)
-; CHECK-NEXT: fsd fa0, 0(sp)
-; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x double> poison, double %b, i64 1
%v2 = insertelement <2 x double> %v1, double %a, i64 0
@@ -359,16 +339,11 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: buildvec_v4f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: fsw fa3, 12(sp)
-; CHECK-NEXT: fsw fa2, 8(sp)
-; CHECK-NEXT: fsw fa1, 4(sp)
-; CHECK-NEXT: fsw fa0, 0(sp)
-; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
; CHECK-NEXT: ret
%v1 = insertelement <4 x float> poison, float %a, i64 0
%v2 = insertelement <4 x float> %v1, float %b, i64 1
More information about the llvm-commits
mailing list