[llvm] [LegalizeDAG][RISCV] Don't promote f16 vector ISD::FNEG/FABS/FCOPYSIGN to f32 when we don't have Zvfh. (PR #106652)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 18:43:14 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-selectiondag
@llvm/pr-subscribers-backend-risc-v
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
The fp_extend will canonicalize NaNs which is not the semantics of FNEG/FABS/FCOPYSIGN.
For fixed vectors I'm scalarizing due to test changes on other targets where the scalarization is expected. I will try to address in a follow up.
For scalable vectors, we bitcast to integer and use integer logic ops.
---
Patch is 396.63 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/106652.diff
9 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (+57-1)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+8-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll (+5062-694)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfabs-sdnode.ll (+24-41)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll (+252-395)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll (+127-148)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfneg-sdnode.ll (+18-41)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll (+177-247)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll (+148-193)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 2557fa288606e7..b551462831acef 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -139,6 +139,8 @@ class VectorLegalizer {
std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
SDValue ExpandStore(SDNode *N);
SDValue ExpandFNEG(SDNode *Node);
+ SDValue ExpandFABS(SDNode *Node);
+ SDValue ExpandFCOPYSIGN(SDNode *Node);
void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandBITREVERSE(SDNode *Node, SmallVectorImpl<SDValue> &Results);
@@ -913,6 +915,12 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
case ISD::FNEG:
Results.push_back(ExpandFNEG(Node));
return;
+ case ISD::FABS:
+ Results.push_back(ExpandFABS(Node));
+ return;
+ case ISD::FCOPYSIGN:
+ Results.push_back(ExpandFCOPYSIGN(Node));
+ return;
case ISD::FSUB:
ExpandFSUB(Node, Results);
return;
@@ -1674,7 +1682,7 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
// FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
- TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) {
+ (TLI.isOperationLegalOrCustom(ISD::FSUB, VT) || VT.isScalableVector())) {
SDLoc DL(Node);
SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
SDValue SignMask = DAG.getConstant(
@@ -1685,6 +1693,54 @@ SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
return DAG.UnrollVectorOp(Node);
}
+SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: We shouldn't restrict this to scalable vectors.
+ if (TLI.isOperationLegalOrCustom(ISD::XOR, IntVT) &&
+ VT.isScalableVector()) {
+ SDLoc DL(Node);
+ SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
+ return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
+ }
+ return DAG.UnrollVectorOp(Node);
+}
+
+SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
+ EVT VT = Node->getValueType(0);
+ EVT IntVT = VT.changeVectorElementTypeToInteger();
+
+ // FIXME: We shouldn't restrict this to scalable vectors.
+ if (VT == Node->getOperand(1).getValueType() &&
+ TLI.isOperationLegalOrCustom(ISD::AND, IntVT) &&
+ TLI.isOperationLegalOrCustom(ISD::OR, IntVT) &&
+ VT.isScalableVector()) {
+ SDLoc DL(Node);
+ SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
+ SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
+
+ SDValue SignMask = DAG.getConstant(
+ APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
+
+ SDValue ClearSignMask = DAG.getConstant(
+ APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
+ SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
+
+ SDNodeFlags Flags;
+ Flags.setDisjoint(true);
+
+ SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, Flags);
+
+ return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
+ }
+ return DAG.UnrollVectorOp(Node);
+}
+
void VectorLegalizer::ExpandFSUB(SDNode *Node,
SmallVectorImpl<SDValue> &Results) {
// For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 09928dcc1f489a..cddd65f58baba8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -884,7 +884,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
static const unsigned ZvfhminPromoteOps[] = {
ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB,
ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT,
- ISD::FABS, ISD::FNEG, ISD::FCOPYSIGN, ISD::FCEIL,
+ ISD::FCEIL,
ISD::FFLOOR, ISD::FROUND, ISD::FROUNDEVEN, ISD::FRINT,
ISD::FNEARBYINT, ISD::IS_FPCLASS, ISD::SETCC, ISD::FMAXIMUM,
ISD::FMINIMUM, ISD::STRICT_FADD, ISD::STRICT_FSUB, ISD::STRICT_FMUL,
@@ -1016,6 +1016,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// load/store
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
+
// Custom split nxv32f16 since nxv32f32 if not legal.
if (VT == MVT::nxv32f16) {
setOperationAction(ZvfhminPromoteOps, VT, Custom);
@@ -1271,6 +1275,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// available.
setOperationAction(ISD::BUILD_VECTOR, MVT::f16, Custom);
}
+ setOperationAction(ISD::FNEG, VT, Expand);
+ setOperationAction(ISD::FABS, VT, Expand);
+ setOperationAction(ISD::FCOPYSIGN, VT, Expand);
MVT F32VecVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
// Don't promote f16 vector operations to f32 if f32 vector type is
// not legal.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index fb9c0a57fd1bee..9ec4ed90720b95 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -514,176 +514,83 @@ define void @fneg_v8f16(ptr %x) {
; ZVFH-NEXT: vse16.v v8, (a0)
; ZVFH-NEXT: ret
;
-; ZVFHMIN-LABEL: fneg_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfneg.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <8 x half>, ptr %x
- %b = fneg <8 x half> %a
- store <8 x half> %b, ptr %x
- ret void
-}
-
-define void @fneg_v6f16(ptr %x) {
-; ZVFH-LABEL: fneg_v6f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-RV32-LABEL: fneg_v6f16:
-; ZVFHMIN-RV32: # %bb.0:
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV32-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-RV32-NEXT: ret
-;
-; ZVFHMIN-RV64-LABEL: fneg_v6f16:
-; ZVFHMIN-RV64: # %bb.0:
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-RV64-NEXT: vfneg.v v8, v9
-; ZVFHMIN-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-RV64-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-RV64-NEXT: ret
- %a = load <6 x half>, ptr %x
- %b = fneg <6 x half> %a
- store <6 x half> %b, ptr %x
- ret void
-}
-
-define void @fneg_v4f32(ptr %x) {
-; ZVFH-LABEL: fneg_v4f32:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; ZVFH-NEXT: vle32.v v8, (a0)
-; ZVFH-NEXT: vfneg.v v8, v8
-; ZVFH-NEXT: vse32.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: fneg_v4f32:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-NEXT: vle32.v v8, (a0)
-; ZVFHMIN-NEXT: vfneg.v v8, v8
-; ZVFHMIN-NEXT: vse32.v v8, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <4 x float>, ptr %x
- %b = fneg <4 x float> %a
- store <4 x float> %b, ptr %x
- ret void
-}
-
-define void @fneg_v2f64(ptr %x) {
-; CHECK-LABEL: fneg_v2f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vfneg.v v8, v8
-; CHECK-NEXT: vse64.v v8, (a0)
-; CHECK-NEXT: ret
- %a = load <2 x double>, ptr %x
- %b = fneg <2 x double> %a
- store <2 x double> %b, ptr %x
- ret void
-}
-
-define void @fabs_v8f16(ptr %x) {
-; ZVFH-LABEL: fabs_v8f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-LABEL: fabs_v8f16:
-; ZVFHMIN: # %bb.0:
-; ZVFHMIN-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-NEXT: vfabs.v v8, v9
-; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-NEXT: ret
- %a = load <8 x half>, ptr %x
- %b = call <8 x half> @llvm.fabs.v8f16(<8 x half> %a)
- store <8 x half> %b, ptr %x
- ret void
-}
-declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
-
-define void @fabs_v6f16(ptr %x) {
-; ZVFH-LABEL: fabs_v6f16:
-; ZVFH: # %bb.0:
-; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; ZVFH-NEXT: vle16.v v8, (a0)
-; ZVFH-NEXT: vfabs.v v8, v8
-; ZVFH-NEXT: vse16.v v8, (a0)
-; ZVFH-NEXT: ret
-;
-; ZVFHMIN-ZFH-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV32-LABEL: fneg_v8f16:
; ZVFHMIN-ZFH-RV32: # %bb.0:
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV32-NEXT: .cfi_def_cfa_offset 32
; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV32-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vfabs.v v8, v9
-; ZVFHMIN-ZFH-RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-ZFH-RV32-NEXT: addi a1, a0, 8
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV32-NEXT: vse32.v v8, (a1)
-; ZVFHMIN-ZFH-RV32-NEXT: vsetivli zero, 4, e16, mf4, ta, ma
-; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v9, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV32-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV32-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV32-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV32-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV32-NEXT: ret
;
-; ZVFHMIN-ZFH-RV64-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFH-RV64-LABEL: fneg_v8f16:
; ZVFHMIN-ZFH-RV64: # %bb.0:
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, -32
+; ZVFHMIN-ZFH-RV64-NEXT: .cfi_def_cfa_offset 32
; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 8, e16, mf2, ta, ma
; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: vfwcvt.f.f.v v9, v8
-; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vfabs.v v8, v9
-; ZVFHMIN-ZFH-RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vfncvt.f.f.w v9, v8
-; ZVFHMIN-ZFH-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFH-RV64-NEXT: vse64.v v9, (a0)
-; ZVFHMIN-ZFH-RV64-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFH-RV64-NEXT: vslidedown.vi v8, v9, 2
-; ZVFHMIN-ZFH-RV64-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: mv a1, sp
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 30(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 10(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 26(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 8(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 22(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 4(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 2(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 18(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: flh fa5, 0(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: fneg.h fa5, fa5
+; ZVFHMIN-ZFH-RV64-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFH-RV64-NEXT: addi a1, sp, 16
+; ZVFHMIN-ZFH-RV64-NEXT: vle16.v v8, (a1)
+; ZVFHMIN-ZFH-RV64-NEXT: vse16.v v8, (a0)
+; ZVFHMIN-ZFH-RV64-NEXT: addi sp, sp, 32
; ZVFHMIN-ZFH-RV64-NEXT: ret
;
-; ZVFHMIN-ZFHIN-RV32-LABEL: fabs_v6f16:
+; ZVFHMIN-ZFHIN-RV32-LABEL: fneg_v8f16:
; ZVFHMIN-ZFHIN-RV32: # %bb.0:
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, -64
; ZVFHMIN-ZFHIN-RV32-NEXT: .cfi_def_cfa_offset 64
@@ -691,246 +598,1645 @@ define void @fabs_v6f16(ptr %x) {
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: mv a1, sp
; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a1)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 14(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 12(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 10(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 8(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 6(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 4(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 24(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 36(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 2(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 40(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 0(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
-; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 44(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: andi a1, a1, 127
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 17(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 21(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 25(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 29(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 33(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 37(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 41(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: lbu a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: xori a1, a1, 128
+; ZVFHMIN-ZFHIN-RV32-NEXT: sb a1, 45(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 16(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 62(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 20(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 60(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 24(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 58(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 28(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 56(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 32(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 54(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa4, 24(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 52(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa3, 20(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 50(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa2, 16(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 48(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 36(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 52(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 40(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 50(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: flh fa5, 44(sp)
+; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 48(sp)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 48
; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v8, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa5, 46(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa4, 44(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa3, 42(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: fsh fa2, 40(sp)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a1, sp, 40
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 4, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vle16.v v9, (a1)
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v9, (a0)
-; ZVFHMIN-ZFHIN-RV32-NEXT: addi a0, a0, 8
-; ZVFHMIN-ZFHIN-RV32-NEXT: vslidedown.vi v8, v8, 2
-; ZVFHMIN-ZFHIN-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; ZVFHMIN-ZFHIN-RV32-NEXT: vse32.v v8, (a0)
+; ZVFHMIN-ZFHIN-RV32-NEXT: vse16.v v8, (a0)
; ZVFHMIN-ZFHIN-RV32-NEXT: addi sp, sp, 64
; ZVFHMIN-ZFHIN-RV32-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/106652
More information about the llvm-commits
mailing list