[llvm] [IR] Add `llvm.sincos` intrinsic (PR #109825)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 12:44:10 PDT 2024
================
@@ -0,0 +1,936 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-gnu-linux < %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=aarch64-none-linux < %s | FileCheck -check-prefixes=NO-LIBCALL %s
+
+define { half, half } @test_sincos_f16(half %a) {
+; CHECK-LABEL: test_sincos_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: fcvt s8, h0
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h9, s0
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: fcvt h1, s1
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ ret { half, half } %result
+}
+
+define half @test_sincos_f16_only_use_sin(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: fcvt s0, h0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.0 = extractvalue { half, half } %result, 0
+ ret half %result.0
+}
+
+define half @test_sincos_f16_only_use_cos(half %a) {
+; CHECK-LABEL: test_sincos_f16_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #8]
+; CHECK-NEXT: fcvt h0, s0
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f16_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 16
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: fcvt s0, h0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { half, half } @llvm.sincos.f16(half %a)
+ %result.1 = extractvalue { half, half } %result, 1
+ ret half %result.1
+}
+
+define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s2, s0, [sp, #32]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ldp s3, s1, [sp, #24]
+; CHECK-NEXT: fcvt h4, s0
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: fcvt h1, s3
+; CHECK-NEXT: ldp s5, s3, [sp, #40]
+; CHECK-NEXT: fcvt h3, s3
+; CHECK-NEXT: mov v0.h[1], v4.h[0]
+; CHECK-NEXT: fcvt h4, s5
+; CHECK-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-NEXT: ldp s5, s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v3.h[0]
+; CHECK-NEXT: fcvt h2, s2
+; CHECK-NEXT: fcvt h3, s5
+; CHECK-NEXT: mov v1.h[2], v4.h[0]
+; CHECK-NEXT: mov v0.h[3], v2.h[0]
+; CHECK-NEXT: mov v1.h[3], v3.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #80
+; NO-LIBCALL-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 80
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: .cfi_offset b10, -40
+; NO-LIBCALL-NEXT: .cfi_offset b11, -48
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s8, h1
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s9, h1
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s10, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s11, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s10
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v0.h[0]
+; NO-LIBCALL-NEXT: fmov s0, s11
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: fcvt h2, s1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[3], v2.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT: add sp, sp, #80
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ ret { <2 x half>, <2 x half> } %result
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_sin(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #36]
+; CHECK-NEXT: ldr s1, [sp, #28]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: ldr s1, [sp, #44]
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-NEXT: ldr s2, [sp, #60]
+; CHECK-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-NEXT: fcvt h1, s2
+; CHECK-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s1, h1
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s1
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0]
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fcvt h1, s0
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ %result.0 = extractvalue { <2 x half>, <2 x half> } %result, 0
+ ret <2 x half> %result.0
+}
+
+define <2 x half> @test_sincos_v2f16_only_use_cos(<2 x half> %a) {
+; CHECK-LABEL: test_sincos_v2f16_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: mov h1, v0.h[1]
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: add x0, sp, #36
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: fcvt s0, h1
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: mov h0, v0.h[2]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #60
+; CHECK-NEXT: add x1, sp, #56
+; CHECK-NEXT: mov h0, v0.h[3]
+; CHECK-NEXT: fcvt s0, h0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #32]
+; CHECK-NEXT: ldr s1, [sp, #24]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: fcvt h2, s0
+; CHECK-NEXT: fcvt h0, s1
+; CHECK-NEXT: ldr s1, [sp, #40]
+; CHECK-NEXT: fcvt h1, s1
+; CHECK-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-NEXT: ldr s2, [sp, #56]
+; CHECK-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-NEXT: fcvt h1, s2
+; CHECK-NEXT: mov v0.h[3], v1.h[0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f16_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov h1, v0.h[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h0, s0
+; NO-LIBCALL-NEXT: fcvt s1, h1
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s1
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[2]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v2.h[1], v1.h[0]
+; NO-LIBCALL-NEXT: str q2, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: fcvt h2, s0
+; NO-LIBCALL-NEXT: mov h1, v1.h[3]
+; NO-LIBCALL-NEXT: fcvt s0, h1
+; NO-LIBCALL-NEXT: ldr q1, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.h[2], v2.h[0]
+; NO-LIBCALL-NEXT: str q1, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fcvt h1, s0
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.h[3], v1.h[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
+ %result.1 = extractvalue { <2 x half>, <2 x half> } %result, 1
+ ret <2 x half> %result.1
+}
+
+define { float, float } @test_sincos_f32(float %a) {
+; CHECK-LABEL: test_sincos_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: fmov s8, s0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: fmov s9, s0
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: fmov s0, s9
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ ret { float, float } %result
+}
+
+define float @test_sincos_f32_only_use_sin(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #12]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b sinf
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ %result.0 = extractvalue { float, float } %result, 0
+ ret float %result.0
+}
+
+define float @test_sincos_f32_only_use_cos(float %a) {
+; CHECK-LABEL: test_sincos_f32_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #12
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f32_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b cosf
+ %result = call { float, float } @llvm.sincos.f32(float %a)
+ %result.1 = extractvalue { float, float } %result, 1
+ ret float %result.1
+}
+
+define { <2 x float>, <2 x float> } @test_sincos_v2f32(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #44
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #28
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldp s1, s0, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ld1 { v1.s }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #64
+; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT: .cfi_offset w30, -8
+; NO-LIBCALL-NEXT: .cfi_offset b8, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov s8, v0.s[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov s0, s8
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: fmov s1, s0
+; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: mov v1.s[1], v2.s[0]
+; NO-LIBCALL-NEXT: // kill: def $d1 killed $d1 killed $q1
+; NO-LIBCALL-NEXT: add sp, sp, #64
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ ret { <2 x float>, <2 x float> } %result
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_sin(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #20
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #28]
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov s0, v0.s[1]
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl sinf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ %result.0 = extractvalue { <2 x float>, <2 x float> } %result, 0
+ ret <2 x float> %result.0
+}
+
+define <2 x float> @test_sincos_v2f32_only_use_cos(<2 x float> %a) {
+; CHECK-LABEL: test_sincos_v2f32_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x30, x19, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 48
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT: add x0, sp, #28
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #20
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #16
+; CHECK-NEXT: mov s0, v0.s[1]
+; CHECK-NEXT: bl sincosf
+; CHECK-NEXT: ldr s0, [sp, #24]
+; CHECK-NEXT: ld1 { v0.s }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f32_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov s0, v0.s[1]
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 killed $q0
+; NO-LIBCALL-NEXT: bl cosf
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $s0 killed $s0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.s[1], v1.s[0]
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x float>, <2 x float> } @llvm.sincos.v2f32(<2 x float> %a)
+ %result.1 = extractvalue { <2 x float>, <2 x float> } %result, 1
+ ret <2 x float> %result.1
+}
+
+define { double, double } @test_sincos_f64(double %a) {
+; CHECK-LABEL: test_sincos_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #24]
+; CHECK-NEXT: ldr d1, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: stp d9, d8, [sp, #-32]! // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 32
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: .cfi_offset b8, -24
+; NO-LIBCALL-NEXT: .cfi_offset b9, -32
+; NO-LIBCALL-NEXT: fmov d8, d0
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: fmov d9, d0
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: fmov d1, d0
+; NO-LIBCALL-NEXT: fmov d0, d9
+; NO-LIBCALL-NEXT: ldp d9, d8, [sp], #32 // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: ret
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ ret { double, double } %result
+}
+
+define double @test_sincos_f64_only_use_sin(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #24]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b sin
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ %result.0 = extractvalue { double, double } %result, 0
+ ret double %result.0
+}
+
+define double @test_sincos_f64_only_use_cos(double %a) {
+; CHECK-LABEL: test_sincos_f64_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #8
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_f64_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: b cos
+ %result = call { double, double } @llvm.sincos.f64(double %a)
+ %result.1 = extractvalue { double, double } %result, 1
+ ret double %result.1
+}
+
+define { <2 x double>, <2 x double> } @test_sincos_v2f64(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #80
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: add x0, sp, #56
+; CHECK-NEXT: add x1, sp, #40
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #32
+; CHECK-NEXT: add x1, sp, #24
+; CHECK-NEXT: add x19, sp, #32
+; CHECK-NEXT: add x20, sp, #24
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #56]
+; CHECK-NEXT: ldr d1, [sp, #40]
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ld1 { v1.d }[1], [x20]
+; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #80
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #64
+; NO-LIBCALL-NEXT: str d8, [sp, #48] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: str x30, [sp, #56] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 64
+; NO-LIBCALL-NEXT: .cfi_offset w30, -8
+; NO-LIBCALL-NEXT: .cfi_offset b8, -16
+; NO-LIBCALL-NEXT: mov d8, v0.d[1]
+; NO-LIBCALL-NEXT: str q0, [sp, #32] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: fmov d0, d8
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: fmov d1, d0
+; NO-LIBCALL-NEXT: ldp q2, q0, [sp] // 32-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: ldr d8, [sp, #48] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v1.d[1], v2.d[0]
+; NO-LIBCALL-NEXT: add sp, sp, #64
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ ret { <2 x double>, <2 x double> } %result
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_sin(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_sin:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #24
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #40]
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_sin:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov d0, v0.d[1]
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl sin
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ %result.0 = extractvalue { <2 x double>, <2 x double> } %result, 0
+ ret <2 x double> %result.0
+}
+
+define <2 x double> @test_sincos_v2f64_only_use_cos(<2 x double> %a) {
+; CHECK-LABEL: test_sincos_v2f64_only_use_cos:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: stp x30, x19, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add x0, sp, #40
+; CHECK-NEXT: add x1, sp, #32
+; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: add x0, sp, #24
+; CHECK-NEXT: add x1, sp, #16
+; CHECK-NEXT: add x19, sp, #16
+; CHECK-NEXT: mov d0, v0.d[1]
+; CHECK-NEXT: bl sincos
+; CHECK-NEXT: ldr d0, [sp, #32]
+; CHECK-NEXT: ld1 { v0.d }[1], [x19]
+; CHECK-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+;
+; NO-LIBCALL-LABEL: test_sincos_v2f64_only_use_cos:
+; NO-LIBCALL: // %bb.0:
+; NO-LIBCALL-NEXT: sub sp, sp, #48
+; NO-LIBCALL-NEXT: str x30, [sp, #32] // 8-byte Folded Spill
+; NO-LIBCALL-NEXT: .cfi_def_cfa_offset 48
+; NO-LIBCALL-NEXT: .cfi_offset w30, -16
+; NO-LIBCALL-NEXT: str q0, [sp] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: mov d0, v0.d[1]
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
+; NO-LIBCALL-NEXT: ldr q0, [sp] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 killed $q0
+; NO-LIBCALL-NEXT: bl cos
+; NO-LIBCALL-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
+; NO-LIBCALL-NEXT: // kill: def $d0 killed $d0 def $q0
+; NO-LIBCALL-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; NO-LIBCALL-NEXT: mov v0.d[1], v1.d[0]
+; NO-LIBCALL-NEXT: add sp, sp, #48
+; NO-LIBCALL-NEXT: ret
+ %result = call { <2 x double>, <2 x double> } @llvm.sincos.v2f64(<2 x double> %a)
+ %result.1 = extractvalue { <2 x double>, <2 x double> } %result, 1
+ ret <2 x double> %result.1
+}
----------------
MacDue wrote:
I added a simple test that uses the `-debug=isel` output to check it: https://github.com/llvm/llvm-project/pull/109825/files#diff-0a696752af71ad6ceb144da9fac6f2ac9b1c13ea233d700484b044b9c07fc36d (marked with `REQUIRES: asserts`).
https://github.com/llvm/llvm-project/pull/109825
More information about the llvm-commits
mailing list