[llvm] [AArch64] Fix SVE scalar fcopysign lowering without neon. (PR #129787)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 5 01:12:22 PST 2025
================
@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
define void @test_copysign_bf16(ptr %ap, ptr %bp) {
; SVE-LABEL: test_copysign_bf16:
; SVE: // %bb.0:
-; SVE-NEXT: adrp x8, .LCPI1_0
-; SVE-NEXT: ldr h1, [x0]
-; SVE-NEXT: ldr h2, [x1]
-; SVE-NEXT: ldr q0, [x8, :lo12:.LCPI1_0]
-; SVE-NEXT: adrp x8, .LCPI1_1
-; SVE-NEXT: ldr q4, [x8, :lo12:.LCPI1_1]
-; SVE-NEXT: mov z3.d, z0.d
-; SVE-NEXT: fmov s0, s1
-; SVE-NEXT: fmov s3, s2
-; SVE-NEXT: bif v0.16b, v3.16b, v4.16b
+; SVE-NEXT: sub sp, sp, #16
+; SVE-NEXT: .cfi_def_cfa_offset 16
+; SVE-NEXT: ldr h0, [x0]
+; SVE-NEXT: ldr h1, [x1]
+; SVE-NEXT: fmov w8, s0
+; SVE-NEXT: str h1, [sp, #12]
+; SVE-NEXT: ldrb w9, [sp, #13]
+; SVE-NEXT: and w8, w8, #0x7fff
+; SVE-NEXT: tst w9, #0x80
+; SVE-NEXT: fmov s0, w8
+; SVE-NEXT: eor w8, w8, #0x8000
+; SVE-NEXT: fmov s1, w8
+; SVE-NEXT: fcsel h0, h1, h0, ne
----------------
david-arm wrote:
The bf16 copysign seems to generate legal code for streaming compatible functions. Is this why you didn't need to add the bf16 to the new code in LowerFCOPYSIGN? I just want to make sure there isn't still a bug for bf16.
https://github.com/llvm/llvm-project/pull/129787
More information about the llvm-commits
mailing list