[llvm] [AArch64] Fix SVE scalar fcopysign lowering without neon. (PR #129787)

David Sherwood via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 5 01:12:22 PST 2025


================
@@ -66,32 +55,40 @@ define void @test_copysign_f16(ptr %ap, ptr %bp) {
 define void @test_copysign_bf16(ptr %ap, ptr %bp) {
 ; SVE-LABEL: test_copysign_bf16:
 ; SVE:       // %bb.0:
-; SVE-NEXT:    adrp x8, .LCPI1_0
-; SVE-NEXT:    ldr h1, [x0]
-; SVE-NEXT:    ldr h2, [x1]
-; SVE-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
-; SVE-NEXT:    adrp x8, .LCPI1_1
-; SVE-NEXT:    ldr q4, [x8, :lo12:.LCPI1_1]
-; SVE-NEXT:    mov z3.d, z0.d
-; SVE-NEXT:    fmov s0, s1
-; SVE-NEXT:    fmov s3, s2
-; SVE-NEXT:    bif v0.16b, v3.16b, v4.16b
+; SVE-NEXT:    sub sp, sp, #16
+; SVE-NEXT:    .cfi_def_cfa_offset 16
+; SVE-NEXT:    ldr h0, [x0]
+; SVE-NEXT:    ldr h1, [x1]
+; SVE-NEXT:    fmov w8, s0
+; SVE-NEXT:    str h1, [sp, #12]
+; SVE-NEXT:    ldrb w9, [sp, #13]
+; SVE-NEXT:    and w8, w8, #0x7fff
+; SVE-NEXT:    tst w9, #0x80
+; SVE-NEXT:    fmov s0, w8
+; SVE-NEXT:    eor w8, w8, #0x8000
+; SVE-NEXT:    fmov s1, w8
+; SVE-NEXT:    fcsel h0, h1, h0, ne
----------------
david-arm wrote:

The bf16 copysign seems to generate legal code for streaming compatible functions. Is this why you didn't need to add the bf16 to the new code in LowerFCOPYSIGN? I just want to make sure there isn't still a bug for bf16.

https://github.com/llvm/llvm-project/pull/129787


More information about the llvm-commits mailing list