[llvm] [AArch64] Avoid using NEON FCVTXN in Streaming-SVE mode. (PR #91981)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Mon May 13 08:10:50 PDT 2024
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/91981
We can still lower these operations using (streaming-compatible) SVE
instructions when compiling for SME or SVE2.
>From 189aad6e57aa35112994759ec494b54838f046c5 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 26 Apr 2024 13:57:41 +0100
Subject: [PATCH] [AArch64] Avoid using NEON FCVTXN in Streaming-SVE mode.
We can still lower these operations using (streaming-compatible) SVE
instructions when compiling for SME or SVE2.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 28 ++++++++++++---
...e-streaming-mode-fixed-length-fcopysign.ll | 36 +++++++++++--------
2 files changed, 45 insertions(+), 19 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7344387ffe552..522f2dc95f87b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19507,7 +19507,27 @@ static SDValue performBuildVectorCombine(SDNode *N,
SDLoc DL(N);
EVT VT = N->getValueType(0);
- if (VT == MVT::v4f16 || VT == MVT::v4bf16) {
+ const auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ bool CanUseFCVTXN = Subtarget.isNeonAvailable() ||
+ (Subtarget.useSVEForFixedLengthVectors() &&
+ (Subtarget.hasSVE2() || Subtarget.hasSME()));
+ if (CanUseFCVTXN && (VT == MVT::v4f16 || VT == MVT::v4bf16)) {
+ // Convenience function to build an FCVT instruction, which is needed
+ // once for the bottom bits and once for the top bits.
+ auto MakeFCVTXN = [&](SDValue V) {
+ if (Subtarget.isNeonAvailable())
+ return DAG.getNode(AArch64ISD::FCVTXN, DL, MVT::v2f32, V);
+ else {
+ SDValue In = convertToScalableVector(DAG, MVT::nxv2f64, V);
+ SDValue PTrue = getPredicateForVector(DAG, DL, MVT::v2f64);
+ SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_fcvtx_f32f64,
+ DL, MVT::i64);
+ SDValue Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv4f32,
+ {ID, In, PTrue, In});
+ return convertFromScalableVector(DAG, MVT::v2f32, Op);
+ }
+ };
+
SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
if (Elt0->getOpcode() == ISD::FP_ROUND &&
@@ -19548,12 +19568,10 @@ static SDValue performBuildVectorCombine(SDNode *N,
Elt2->getOperand(0)->getConstantOperandVal(1) == 0 &&
Elt3->getOperand(0)->getConstantOperandVal(1) == 1) {
SDValue HighLanesSrcVec = Elt2->getOperand(0)->getOperand(0);
- HighLanes =
- DAG.getNode(AArch64ISD::FCVTXN, DL, MVT::v2f32, HighLanesSrcVec);
+ HighLanes = MakeFCVTXN(HighLanesSrcVec);
}
if (HighLanes) {
- SDValue DoubleToSingleSticky =
- DAG.getNode(AArch64ISD::FCVTXN, DL, MVT::v2f32, LowLanesSrcVec);
+ SDValue DoubleToSingleSticky = MakeFCVTXN(LowLanesSrcVec);
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32,
DoubleToSingleSticky, HighLanes);
return DAG.getNode(ISD::FP_ROUND, DL, VT, Concat,
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 0d6675def8b52..196105d3f26d0 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -427,28 +427,36 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_v4f16_v4f64:
; SVE: // %bb.0:
-; SVE-NEXT: ldp q0, q1, [x1]
-; SVE-NEXT: ptrue p0.s, vl2
-; SVE-NEXT: fcvtxn v1.2s, v1.2d
-; SVE-NEXT: fcvtxn v0.2s, v0.2d
-; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
-; SVE-NEXT: ptrue p0.s
-; SVE-NEXT: ldr d1, [x0]
-; SVE-NEXT: and z1.h, z1.h, #0x7fff
-; SVE-NEXT: fcvt z0.h, p0/m, z0.s
-; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
+; SVE-NEXT: sub sp, sp, #16
+; SVE-NEXT: .cfi_def_cfa_offset 16
+; SVE-NEXT: ldp q1, q0, [x1]
+; SVE-NEXT: ldr d4, [x0]
+; SVE-NEXT: and z4.h, z4.h, #0x7fff
+; SVE-NEXT: mov z2.d, z0.d[1]
+; SVE-NEXT: mov z3.d, z1.d[1]
+; SVE-NEXT: fcvt h0, d0
+; SVE-NEXT: fcvt h1, d1
+; SVE-NEXT: fcvt h2, d2
+; SVE-NEXT: fcvt h3, d3
+; SVE-NEXT: str h0, [sp, #12]
+; SVE-NEXT: str h1, [sp, #8]
+; SVE-NEXT: str h2, [sp, #14]
+; SVE-NEXT: str h3, [sp, #10]
+; SVE-NEXT: ldr d0, [sp, #8]
; SVE-NEXT: and z0.h, z0.h, #0x8000
-; SVE-NEXT: orr z0.d, z1.d, z0.d
+; SVE-NEXT: orr z0.d, z4.d, z0.d
; SVE-NEXT: str d0, [x0]
+; SVE-NEXT: add sp, sp, #16
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_v4f16_v4f64:
; SVE2: // %bb.0:
; SVE2-NEXT: ldp q0, q1, [x1]
-; SVE2-NEXT: ptrue p0.s, vl2
+; SVE2-NEXT: ptrue p0.d, vl2
; SVE2-NEXT: ldr d2, [x0]
-; SVE2-NEXT: fcvtxn v1.2s, v1.2d
-; SVE2-NEXT: fcvtxn v0.2s, v0.2d
+; SVE2-NEXT: fcvtx z1.s, p0/m, z1.d
+; SVE2-NEXT: fcvtx z0.s, p0/m, z0.d
+; SVE2-NEXT: ptrue p0.s, vl2
; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
; SVE2-NEXT: ptrue p0.s
; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
More information about the llvm-commits
mailing list