[llvm] [IR] Add `llvm.sincos` intrinsic (PR #109825)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 11 10:14:26 PDT 2024


================
@@ -0,0 +1,936 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=aarch64-none-linux < %s | FileCheck -check-prefixes=NO-LIBCALL %s
+
+define { half, half } @test_sincos_f16(half %a) {
+; CHECK-LABEL: test_sincos_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    fcvt h1, s1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    fcvt s8, h0
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h9, s0
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    fcvt h1, s1
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #12]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    fcvt s0, h0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  %result.0 = extractvalue { half, half } %result, 0
+  ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #8]
+; CHECK-NEXT:    fcvt h0, s0
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    fcvt s0, h0
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { half, half } @llvm.sincos.f16(half %a)
+  %result.1 = extractvalue { half, half } %result, 1
+  ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s2, s0, [sp, #32]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ldp s3, s1, [sp, #24]
+; CHECK-NEXT:    fcvt h4, s0
+; CHECK-NEXT:    fcvt h2, s2
+; CHECK-NEXT:    fcvt h0, s1
+; CHECK-NEXT:    fcvt h1, s3
+; CHECK-NEXT:    ldp s5, s3, [sp, #40]
+; CHECK-NEXT:    fcvt h3, s3
+; CHECK-NEXT:    mov v0.h[1], v4.h[0]
+; CHECK-NEXT:    fcvt h4, s5
+; CHECK-NEXT:    mov v1.h[1], v2.h[0]
+; CHECK-NEXT:    ldp s5, s2, [sp, #56]
+; CHECK-NEXT:    mov v0.h[2], v3.h[0]
+; CHECK-NEXT:    fcvt h2, s2
+; CHECK-NEXT:    fcvt h3, s5
+; CHECK-NEXT:    mov v1.h[2], v4.h[0]
+; CHECK-NEXT:    mov v0.h[3], v2.h[0]
+; CHECK-NEXT:    mov v1.h[3], v3.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #80
+; NO-LIBCALL-NEXT:    stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 80
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    .cfi_offset b10, -40
+; NO-LIBCALL-NEXT:    .cfi_offset b11, -48
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov h1, v0.h[1]
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fcvt s8, h1
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    fcvt s9, h1
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[2]
+; NO-LIBCALL-NEXT:    fcvt s10, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[3]
+; NO-LIBCALL-NEXT:    fcvt s11, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT:    fmov s0, s11
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[3], v0.h[0]
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s10
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT:    fmov s0, s11
+; NO-LIBCALL-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    fcvt h2, s1
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT:    add sp, sp, #80
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+  ret { <2 x half>, <2 x half> } %result
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #36]
+; CHECK-NEXT:    ldr s1, [sp, #28]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    fcvt h2, s0
+; CHECK-NEXT:    fcvt h0, s1
+; CHECK-NEXT:    ldr s1, [sp, #44]
+; CHECK-NEXT:    fcvt h1, s1
+; CHECK-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-NEXT:    ldr s2, [sp, #60]
+; CHECK-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-NEXT:    fcvt h1, s2
+; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #48
+; NO-LIBCALL-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov h1, v0.h[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fcvt s0, h1
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    fcvt s1, h1
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s1
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h2, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[2]
+; NO-LIBCALL-NEXT:    fcvt s0, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v2.h[1], v1.h[0]
+; NO-LIBCALL-NEXT:    str q2, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h2, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[3]
+; NO-LIBCALL-NEXT:    fcvt s0, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[2], v2.h[0]
+; NO-LIBCALL-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fcvt h1, s0
+; NO-LIBCALL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.h[3], v1.h[0]
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    add sp, sp, #48
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+  %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
+  ret <2 x half> %result.0
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h1, v0.h[1]
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    add x0, sp, #36
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    fcvt s0, h1
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    mov h0, v0.h[2]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #60
+; CHECK-NEXT:    add x1, sp, #56
+; CHECK-NEXT:    mov h0, v0.h[3]
+; CHECK-NEXT:    fcvt s0, h0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #32]
+; CHECK-NEXT:    ldr s1, [sp, #24]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    fcvt h2, s0
+; CHECK-NEXT:    fcvt h0, s1
+; CHECK-NEXT:    ldr s1, [sp, #40]
+; CHECK-NEXT:    fcvt h1, s1
+; CHECK-NEXT:    mov v0.h[1], v2.h[0]
+; CHECK-NEXT:    ldr s2, [sp, #56]
+; CHECK-NEXT:    mov v0.h[2], v1.h[0]
+; CHECK-NEXT:    fcvt h1, s2
+; CHECK-NEXT:    mov v0.h[3], v1.h[0]
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #48
+; NO-LIBCALL-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov h1, v0.h[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fcvt s0, h1
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h0, s0
+; NO-LIBCALL-NEXT:    fcvt s1, h1
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s1
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h2, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[2]
+; NO-LIBCALL-NEXT:    fcvt s0, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v2.h[1], v1.h[0]
+; NO-LIBCALL-NEXT:    str q2, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    fcvt h2, s0
+; NO-LIBCALL-NEXT:    mov h1, v1.h[3]
+; NO-LIBCALL-NEXT:    fcvt s0, h1
+; NO-LIBCALL-NEXT:    ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.h[2], v2.h[0]
+; NO-LIBCALL-NEXT:    str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fcvt h1, s0
+; NO-LIBCALL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.h[3], v1.h[0]
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    add sp, sp, #48
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+  %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
+  ret <2 x half> %result.1
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+; CHECK-LABEL: test_sincos_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    fmov s8, s0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    fmov s9, s0
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    fmov s0, s9
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { float, float } @llvm.sincos.f32(float %a)
+  ret { float, float } %result
+}
+
+define float @test_sincos_f32_only_use_sin(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #12]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    b sinf
+  %result = call { float, float } @llvm.sincos.f32(float %a)
+  %result.0 = extractvalue { float, float } %result, 0
+  ret float %result.0
+}
+
+define float @test_sincos_f32_only_use_cos(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #12
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    b cosf
+  %result = call { float, float } @llvm.sincos.f32(float %a)
+  %result.1 = extractvalue { float, float } %result, 1
+  ret float %result.1
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    add x0, sp, #44
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #28
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldp s1, s0, [sp, #40]
+; CHECK-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.s }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #64
+; NO-LIBCALL-NEXT:    str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -8
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov s8, v0.s[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov s0, s8
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    fmov s1, s0
+; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    mov v1.s[1], v2.s[0]
+; NO-LIBCALL-NEXT:    // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT:    add sp, sp, #64
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+  ret { <2 x float>, <2 x float> } %result
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #20
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    add x19, sp, #20
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #28]
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #48
+; NO-LIBCALL-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    mov s0, v0.s[1]
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl sinf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    add sp, sp, #48
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+  %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
+  ret <2 x float> %result.0
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #48
+; CHECK-NEXT:    stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 48
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    add x0, sp, #28
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #20
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    add x19, sp, #16
+; CHECK-NEXT:    mov s0, v0.s[1]
+; CHECK-NEXT:    bl sincosf
+; CHECK-NEXT:    ldr s0, [sp, #24]
+; CHECK-NEXT:    ld1 { v0.s }[1], [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    add sp, sp, #48
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #48
+; NO-LIBCALL-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    mov s0, v0.s[1]
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT:    bl cosf
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    add sp, sp, #48
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+  %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
+  ret <2 x float> %result.1
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+; CHECK-LABEL: test_sincos_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #24]
+; CHECK-NEXT:    ldr d1, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -24
+; NO-LIBCALL-NEXT:    .cfi_offset b9, -32
+; NO-LIBCALL-NEXT:    fmov d8, d0
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    fmov d9, d0
+; NO-LIBCALL-NEXT:    fmov d0, d8
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    fmov d1, d0
+; NO-LIBCALL-NEXT:    fmov d0, d9
+; NO-LIBCALL-NEXT:    ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    ret
+  %result = call { double, double } @llvm.sincos.f64(double %a)
+  ret { double, double } %result
+}
+
+define double @test_sincos_f64_only_use_sin(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #24]
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    b sin
+  %result = call { double, double } @llvm.sincos.f64(double %a)
+  %result.0 = extractvalue { double, double } %result, 0
+  ret double %result.0
+}
+
+define double @test_sincos_f64_only_use_cos(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #8
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #8]
+; CHECK-NEXT:    ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    b cos
+  %result = call { double, double } @llvm.sincos.f64(double %a)
+  %result.1 = extractvalue { double, double } %result, 1
+  ret double %result.1
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #80
+; CHECK-NEXT:    str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w20, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    add x0, sp, #56
+; CHECK-NEXT:    add x1, sp, #40
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #32
+; CHECK-NEXT:    add x1, sp, #24
+; CHECK-NEXT:    add x19, sp, #32
+; CHECK-NEXT:    add x20, sp, #24
+; CHECK-NEXT:    mov d0, v0.d[1]
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #56]
+; CHECK-NEXT:    ldr d1, [sp, #40]
+; CHECK-NEXT:    ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT:    ld1 { v0.d }[1], [x19]
+; CHECK-NEXT:    ld1 { v1.d }[1], [x20]
+; CHECK-NEXT:    ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #80
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #64
+; NO-LIBCALL-NEXT:    str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -8
+; NO-LIBCALL-NEXT:    .cfi_offset b8, -16
+; NO-LIBCALL-NEXT:    mov d8, v0.d[1]
+; NO-LIBCALL-NEXT:    str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov d0, d8
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    fmov d0, d8
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    fmov d1, d0
+; NO-LIBCALL-NEXT:    ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v1.d[1], v2.d[0]
+; NO-LIBCALL-NEXT:    add sp, sp, #64
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+  ret { <2 x double>, <2 x double> } %result
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_sin:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #40
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    add x19, sp, #24
+; CHECK-NEXT:    mov d0, v0.d[1]
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #40]
+; CHECK-NEXT:    ld1 { v0.d }[1], [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_sin:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #48
+; NO-LIBCALL-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    mov d0, v0.d[1]
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    bl sin
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT:    add sp, sp, #48
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+  %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+  ret <2 x double> %result.0
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_cos:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    .cfi_offset w19, -8
+; CHECK-NEXT:    .cfi_offset w30, -16
+; CHECK-NEXT:    add x0, sp, #40
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT:    add x0, sp, #24
+; CHECK-NEXT:    add x1, sp, #16
+; CHECK-NEXT:    add x19, sp, #16
+; CHECK-NEXT:    mov d0, v0.d[1]
+; CHECK-NEXT:    bl sincos
+; CHECK-NEXT:    ldr d0, [sp, #32]
+; CHECK-NEXT:    ld1 { v0.d }[1], [x19]
+; CHECK-NEXT:    ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_cos:
+; NO-LIBCALL:       // %bb.0:
+; NO-LIBCALL-NEXT:    sub sp, sp, #48
+; NO-LIBCALL-NEXT:    str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT:    .cfi_offset w30, -16
+; NO-LIBCALL-NEXT:    str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    mov d0, v0.d[1]
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT:    ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT:    bl cos
+; NO-LIBCALL-NEXT:    ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT:    // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT:    ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT:    mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT:    add sp, sp, #48
+; NO-LIBCALL-NEXT:    ret
+  %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+  %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+  ret <2 x double> %result.1
+}
----------------
arsenm wrote:

Should have a dag test with flags, not sure if there's a good way to see a manifested codegen difference (I guess could have a user that depends on an upstream nnan flag) 

https://github.com/llvm/llvm-project/pull/109825


More information about the llvm-commits mailing list