[llvm] d4ab3df - [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 09:18:10 PST 2025
Author: David Green
Date: 2025-03-05T17:18:07Z
New Revision: d4ab3df320f9eebf11cc5fb600a0919f93678abe
URL: https://github.com/llvm/llvm-project/commit/d4ab3df320f9eebf11cc5fb600a0919f93678abe
DIFF: https://github.com/llvm/llvm-project/commit/d4ab3df320f9eebf11cc5fb600a0919f93678abe.diff
LOG: [AArch64] Fix SVE scalar fcopysign lowering without neon. (#129787)
Without this we can try to generate invalid instructions or create
illegal types. This patch generates a SVE fcopysign instead and use its
lowering. BF16 is left out of the moment as it doesn't lower
successfully (but could use the same code as fp16).
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1f05230a45042..ec8b18fd6d0dd 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10686,6 +10686,25 @@ SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
return convertFromScalableVector(DAG, VT, Res);
}
+ // With SVE, but without Neon, extend the scalars to scalable vectors and use
+ // a SVE FCOPYSIGN.
+ if (!VT.isVector() && !Subtarget->isNeonAvailable() &&
+ Subtarget->isSVEorStreamingSVEAvailable()) {
+ if (VT != MVT::f16 && VT != MVT::f32 && VT != MVT::f64)
+ return SDValue();
+ EVT SVT = getPackedSVEVectorVT(VT);
+
+ SDValue Ins1 =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In1,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue Ins2 =
+ DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, SVT, DAG.getUNDEF(SVT), In2,
+ DAG.getConstant(0, DL, MVT::i64));
+ SDValue FCS = DAG.getNode(ISD::FCOPYSIGN, DL, SVT, Ins1, Ins2);
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, FCS,
+ DAG.getConstant(0, DL, MVT::i64));
+ }
+
auto BitCast = [this](EVT VT, SDValue Op, SelectionDAG &DAG) {
if (VT.isScalableVector())
return getSVESafeBitCast(VT, Op, DAG);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
index 238c124b7cb06..79921e25caf53 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll
@@ -11,32 +11,21 @@ target triple = "aarch64-unknown-linux-gnu"
define void @test_copysign_f16(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_f16:
; SVE: // %bb.0:
-; SVE-NEXT: adrp x8, .LCPI0_0
+; SVE-NEXT: ldr h0, [x1]
; SVE-NEXT: ldr h1, [x0]
-; SVE-NEXT: ldr h2, [x1]
-; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
-; SVE-NEXT: adrp x8, .LCPI0_1
-; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
-; SVE-NEXT: mov z3.d, z0.d
-; SVE-NEXT: fmov s0, s1
-; SVE-NEXT: fmov s3, s2
-; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
+; SVE-NEXT: and z0.h, z0.h, #0x8000
+; SVE-NEXT: and z1.h, z1.h, #0x7fff
+; SVE-NEXT: orr z0.d, z1.d, z0.d
; SVE-NEXT: str h0, [x0]
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_f16:
; SVE2: // %bb.0:
-; SVE2-NEXT: adrp x8, .LCPI0_0
-; SVE2-NEXT: ldr h1, [x0]
-; SVE2-NEXT: ldr h2, [x1]
-; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI0_0]
-; SVE2-NEXT: adrp x8, .LCPI0_1
-; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI0_1]
-; SVE2-NEXT: mov z3.d, z0.d
-; SVE2-NEXT: fmov s0, s1
-; SVE2-NEXT: fmov s3, s2
-; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
-; SVE2-NEXT: str h0, [x0]
+; SVE2-NEXT: mov z0.h, #32767 // =0x7fff
+; SVE2-NEXT: ldr h1, [x1]
+; SVE2-NEXT: ldr h2, [x0]
+; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT: str h2, [x0]
; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: test_copysign_f16:
@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
define void @test_copysign_bf16(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_bf16:
; SVE: // %bb.0:
-; SVE-NEXT: adrp x8, .LCPI1_0
-; SVE-NEXT: ldr h1, [x0]
-; SVE-NEXT: ldr h2, [x1]
-; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
-; SVE-NEXT: adrp x8, .LCPI1_1
-; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
-; SVE-NEXT: mov z3.d, z0.d
-; SVE-NEXT: fmov s0, s1
-; SVE-NEXT: fmov s3, s2
-; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
+; SVE-NEXT: sub sp, sp, #16
+; SVE-NEXT: .cfi_def_cfa_offset 16
+; SVE-NEXT: ldr h0, [x0]
+; SVE-NEXT: ldr h1, [x1]
+; SVE-NEXT: fmov w8, s0
+; SVE-NEXT: str h1, [sp, #12]
+; SVE-NEXT: ldrb w9, [sp, #13]
+; SVE-NEXT: and w8, w8, #0x7fff
+; SVE-NEXT: tst w9, #0x80
+; SVE-NEXT: fmov s0, w8
+; SVE-NEXT: eor w8, w8, #0x8000
+; SVE-NEXT: fmov s1, w8
+; SVE-NEXT: fcsel h0, h1, h0, ne
; SVE-NEXT: str h0, [x0]
+; SVE-NEXT: add sp, sp, #16
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_bf16:
; SVE2: // %bb.0:
-; SVE2-NEXT: adrp x8, .LCPI1_0
-; SVE2-NEXT: ldr h1, [x0]
-; SVE2-NEXT: ldr h2, [x1]
-; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
-; SVE2-NEXT: adrp x8, .LCPI1_1
-; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
-; SVE2-NEXT: mov z3.d, z0.d
-; SVE2-NEXT: fmov s0, s1
-; SVE2-NEXT: fmov s3, s2
-; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
+; SVE2-NEXT: sub sp, sp, #16
+; SVE2-NEXT: .cfi_def_cfa_offset 16
+; SVE2-NEXT: ldr h0, [x0]
+; SVE2-NEXT: ldr h1, [x1]
+; SVE2-NEXT: fmov w8, s0
+; SVE2-NEXT: str h1, [sp, #12]
+; SVE2-NEXT: ldrb w9, [sp, #13]
+; SVE2-NEXT: and w8, w8, #0x7fff
+; SVE2-NEXT: tst w9, #0x80
+; SVE2-NEXT: fmov s0, w8
+; SVE2-NEXT: eor w8, w8, #0x8000
+; SVE2-NEXT: fmov s1, w8
+; SVE2-NEXT: fcsel h0, h1, h0, ne
; SVE2-NEXT: str h0, [x0]
+; SVE2-NEXT: add sp, sp, #16
; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: test_copysign_bf16:
@@ -139,32 +136,21 @@ define void @test_copysign_bf16(ptr %ap, ptr %bp) {
define void @test_copysign_f32(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_f32:
; SVE: // %bb.0:
-; SVE-NEXT: adrp x8, .LCPI2_0
+; SVE-NEXT: ldr s0, [x1]
; SVE-NEXT: ldr s1, [x0]
-; SVE-NEXT: ldr s2, [x1]
-; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
-; SVE-NEXT: adrp x8, .LCPI2_1
-; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
-; SVE-NEXT: mov z3.d, z0.d
-; SVE-NEXT: fmov s0, s1
-; SVE-NEXT: fmov s3, s2
-; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
+; SVE-NEXT: and z0.s, z0.s, #0x80000000
+; SVE-NEXT: and z1.s, z1.s, #0x7fffffff
+; SVE-NEXT: orr z0.d, z1.d, z0.d
; SVE-NEXT: str s0, [x0]
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_f32:
; SVE2: // %bb.0:
-; SVE2-NEXT: adrp x8, .LCPI2_0
-; SVE2-NEXT: ldr s1, [x0]
-; SVE2-NEXT: ldr s2, [x1]
-; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI2_0]
-; SVE2-NEXT: adrp x8, .LCPI2_1
-; SVE2-NEXT: ldr q4, [x8, :lo12:.LCPI2_1]
-; SVE2-NEXT: mov z3.d, z0.d
-; SVE2-NEXT: fmov s0, s1
-; SVE2-NEXT: fmov s3, s2
-; SVE2-NEXT: bif v0.16b, v3.16b, v4.16b
-; SVE2-NEXT: str s0, [x0]
+; SVE2-NEXT: mov z0.s, #0x7fffffff
+; SVE2-NEXT: ldr s1, [x1]
+; SVE2-NEXT: ldr s2, [x0]
+; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT: str s2, [x0]
; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: test_copysign_f32:
@@ -187,36 +173,21 @@ define void @test_copysign_f32(ptr %ap, ptr %bp) {
define void @test_copysign_f64(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_f64:
; SVE: // %bb.0:
-; SVE-NEXT: adrp x8, .LCPI3_1
-; SVE-NEXT: ptrue p0.d, vl2
-; SVE-NEXT: ldr d2, [x0]
-; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
-; SVE-NEXT: adrp x8, .LCPI3_0
-; SVE-NEXT: ldr d3, [x1]
-; SVE-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; SVE-NEXT: fneg z0.d, p0/m, z0.d
-; SVE-NEXT: mov z4.d, z1.d
-; SVE-NEXT: fmov d1, d2
-; SVE-NEXT: fmov d4, d3
-; SVE-NEXT: bsl v0.16b, v1.16b, v4.16b
+; SVE-NEXT: ldr d0, [x1]
+; SVE-NEXT: ldr d1, [x0]
+; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000
+; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff
+; SVE-NEXT: orr z0.d, z1.d, z0.d
; SVE-NEXT: str d0, [x0]
; SVE-NEXT: ret
;
; SVE2-LABEL: test_copysign_f64:
; SVE2: // %bb.0:
-; SVE2-NEXT: adrp x8, .LCPI3_1
-; SVE2-NEXT: ptrue p0.d, vl2
+; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff
+; SVE2-NEXT: ldr d1, [x1]
; SVE2-NEXT: ldr d2, [x0]
-; SVE2-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
-; SVE2-NEXT: adrp x8, .LCPI3_0
-; SVE2-NEXT: ldr d3, [x1]
-; SVE2-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; SVE2-NEXT: fneg z0.d, p0/m, z0.d
-; SVE2-NEXT: mov z4.d, z1.d
-; SVE2-NEXT: fmov d1, d2
-; SVE2-NEXT: fmov d4, d3
-; SVE2-NEXT: bsl v0.16b, v1.16b, v4.16b
-; SVE2-NEXT: str d0, [x0]
+; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d
+; SVE2-NEXT: str d2, [x0]
; SVE2-NEXT: ret
;
; NONEON-NOSVE-LABEL: test_copysign_f64:
More information about the llvm-commits
mailing list