[llvm] 3a32590 - [AArch64] Avoid using NEON FCVTXN in Streaming-SVE mode. (#91981)
via llvm-commits
llvm-commits at lists.llvm.org
Fri May 17 06:11:33 PDT 2024
Author: Sander de Smalen
Date: 2024-05-17T14:11:28+01:00
New Revision: 3a32590f25482e283a79dea1f313a226a6dd392f
URL: https://github.com/llvm/llvm-project/commit/3a32590f25482e283a79dea1f313a226a6dd392f
DIFF: https://github.com/llvm/llvm-project/commit/3a32590f25482e283a79dea1f313a226a6dd392f.diff
LOG: [AArch64] Avoid using NEON FCVTXN in Streaming-SVE mode. (#91981)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6223c211b33b6..e31a27e9428e8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -19501,7 +19501,8 @@ static SDValue performBuildVectorCombine(SDNode *N,
SDLoc DL(N);
EVT VT = N->getValueType(0);
- if (VT == MVT::v4f16 || VT == MVT::v4bf16) {
+ if (DAG.getSubtarget<AArch64Subtarget>().isNeonAvailable() &&
+ (VT == MVT::v4f16 || VT == MVT::v4bf16)) {
SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1),
Elt2 = N->getOperand(2), Elt3 = N->getOperand(3);
if (Elt0->getOpcode() == ISD::FP_ROUND &&
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 0d6675def8b52..f017eead92cff 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -427,35 +427,49 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) {
define void @test_copysign_v4f16_v4f64(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_v4f16_v4f64:
; SVE: // %bb.0:
-; SVE-NEXT: ldp q0, q1, [x1]
-; SVE-NEXT: ptrue p0.s, vl2
-; SVE-NEXT: fcvtxn v1.2s, v1.2d
-; SVE-NEXT: fcvtxn v0.2s, v0.2d
-; SVE-NEXT: splice z0.s, p0, z0.s, z1.s
-; SVE-NEXT: ptrue p0.s
-; SVE-NEXT: ldr d1, [x0]
-; SVE-NEXT: and z1.h, z1.h, #0x7fff
-; SVE-NEXT: fcvt z0.h, p0/m, z0.s
-; SVE-NEXT: uzp1 z0.h, z0.h, z0.h
+; SVE-NEXT: sub sp, sp, #16
+; SVE-NEXT: .cfi_def_cfa_offset 16
+; SVE-NEXT: ldp q1, q0, [x1]
+; SVE-NEXT: ldr d4, [x0]
+; SVE-NEXT: and z4.h, z4.h, #0x7fff
+; SVE-NEXT: mov z2.d, z0.d[1]
+; SVE-NEXT: mov z3.d, z1.d[1]
+; SVE-NEXT: fcvt h0, d0
+; SVE-NEXT: fcvt h1, d1
+; SVE-NEXT: fcvt h2, d2
+; SVE-NEXT: fcvt h3, d3
+; SVE-NEXT: str h0, [sp, #12]
+; SVE-NEXT: str h1, [sp, #8]
+; SVE-NEXT: str h2, [sp, #14]
+; SVE-NEXT: str h3, [sp, #10]
+; SVE-NEXT: ldr d0, [sp, #8]
; SVE-NEXT: and z0.h, z0.h, #0x8000
-; SVE-NEXT: orr z0.d, z1.d, z0.d
+; SVE-NEXT: orr z0.d, z4.d, z0.d
; SVE-NEXT: str d0, [x0]
+; SVE-NEXT: add sp, sp, #16
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_v4f16_v4f64:
; SVE2: // %bb.0:
-; SVE2-NEXT: ldp q0, q1, [x1]
-; SVE2-NEXT: ptrue p0.s, vl2
-; SVE2-NEXT: ldr d2, [x0]
-; SVE2-NEXT: fcvtxn v1.2s, v1.2d
-; SVE2-NEXT: fcvtxn v0.2s, v0.2d
-; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s
-; SVE2-NEXT: ptrue p0.s
-; SVE2-NEXT: mov z1.h, #32767 // =0x7fff
-; SVE2-NEXT: fcvt z0.h, p0/m, z0.s
-; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h
-; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d
-; SVE2-NEXT: str d2, [x0]
+; SVE2-NEXT: sub sp, sp, #16
+; SVE2-NEXT: .cfi_def_cfa_offset 16
+; SVE2-NEXT: ldp q2, q1, [x1]
+; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
+; SVE2-NEXT: ldr d5, [x0]
+; SVE2-NEXT: mov z3.d, z1.d[1]
+; SVE2-NEXT: mov z4.d, z2.d[1]
+; SVE2-NEXT: fcvt h1, d1
+; SVE2-NEXT: fcvt h2, d2
+; SVE2-NEXT: fcvt h3, d3
+; SVE2-NEXT: fcvt h4, d4
+; SVE2-NEXT: str h1, [sp, #12]
+; SVE2-NEXT: str h2, [sp, #8]
+; SVE2-NEXT: str h3, [sp, #14]
+; SVE2-NEXT: str h4, [sp, #10]
+; SVE2-NEXT: ldr d1, [sp, #8]
+; SVE2-NEXT: bsl z5.d, z5.d, z1.d, z0.d
+; SVE2-NEXT: str d5, [x0]
+; SVE2-NEXT: add sp, sp, #16
; SVE2-NEXT: ret
%a = load <4 x half>, ptr %ap
%b = load <4 x double>, ptr %bp
More information about the llvm-commits
mailing list