[llvm] eb00555 - AMDGPU: Add more tests for sincos recognition

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 1 15:20:55 PDT 2023


Author: Matt Arsenault
Date: 2023-08-01T18:20:50-04:00
New Revision: eb00555c16b70d7940e99a89dce72994f5790cc9

URL: https://github.com/llvm/llvm-project/commit/eb00555c16b70d7940e99a89dce72994f5790cc9
DIFF: https://github.com/llvm/llvm-project/commit/eb00555c16b70d7940e99a89dce72994f5790cc9.diff

LOG: AMDGPU: Add more tests for sincos recognition

These show both broken cases and cases which are handled too
conservatively.

Added: 
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll
    llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll
new file mode 100644
index 00000000000000..87e1dc63e6588b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+; sin, cos, and sincos are already defined in the module.
+
+define float @_Z3sinf(float %x) {
+; CHECK-LABEL: define float @_Z3sinf
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call float asm "
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+  %result = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  ret float %result
+}
+
+define float @_Z3cosf(float %x) {
+; CHECK-LABEL: define float @_Z3cosf
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call float asm "
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+  %result = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  ret float %result
+}
+
+define <2 x float> @_Z3sinDv2_f(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @_Z3sinDv2_f
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    ret <2 x float> [[RESULT]]
+;
+  %result = call <2 x float> asm "; $0 = sin($1)","=v,v"(<2 x float> %x)
+  ret <2 x float> %result
+}
+
+define <2 x float> @_Z3cosDv2_f(<2 x float> %x) {
+; CHECK-LABEL: define <2 x float> @_Z3cosDv2_f
+; CHECK-SAME: (<2 x float> [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    ret <2 x float> [[RESULT]]
+;
+  %result = call <2 x float> asm "; $0 = cos($1)","=v,v"(<2 x float> %x)
+  ret <2 x float> %result
+}
+
+define float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %ptr) {
+; CHECK-LABEL: define float @_Z6sincosfPU3AS5f
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(5) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[RESULT0:%.*]] = call float asm "
+; CHECK-NEXT:    [[RESULT1:%.*]] = call float asm "
+; CHECK-NEXT:    store float [[RESULT1]], ptr addrspace(5) [[PTR]], align 4
+; CHECK-NEXT:    ret float [[RESULT0]]
+;
+  %result0 = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  %result1 = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  store float %result1, ptr addrspace(5) %ptr
+  ret float %result0
+}
+
+define <2 x float> @_Z6sincosDv2_fPU3AS5S_(<2 x float> %x, ptr addrspace(5) %ptr) {
+; CHECK-LABEL: define <2 x float> @_Z6sincosDv2_fPU3AS5S_
+; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(5) [[PTR:%.*]]) {
+; CHECK-NEXT:    [[RESULT0:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    [[RESULT1:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    store <2 x float> [[RESULT1]], ptr addrspace(5) [[PTR]], align 8
+; CHECK-NEXT:    ret <2 x float> [[RESULT0]]
+;
+  %result0 = call <2 x float> asm "; $0 = sin($1)","=v,v"(<2 x float> %x)
+  %result1 = call <2 x float> asm "; $0 = cos($1)","=v,v"(<2 x float> %x)
+  store <2 x float> %result1, ptr addrspace(5) %ptr
+  ret <2 x float> %result0
+}
+
+define float @_Z6sincosfPU3AS0f(float %x, ptr %ptr) {
+; CHECK-LABEL: define float @_Z6sincosfPU3AS0f
+; CHECK-SAME: (float [[X:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[RESULT0:%.*]] = call float asm "
+; CHECK-NEXT:    [[RESULT1:%.*]] = call float asm "
+; CHECK-NEXT:    store float [[RESULT1]], ptr [[PTR]], align 4
+; CHECK-NEXT:    ret float [[RESULT0]]
+;
+  %result0 = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  %result1 = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  store float %result1, ptr %ptr
+  ret float %result0
+}
+
+define <2 x float> @_Z6sincosDv2_fPU3AS0S_(<2 x float> %x, ptr %ptr) {
+; CHECK-LABEL: define <2 x float> @_Z6sincosDv2_fPU3AS0S_
+; CHECK-SAME: (<2 x float> [[X:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[RESULT0:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    [[RESULT1:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    store <2 x float> [[RESULT1]], ptr [[PTR]], align 8
+; CHECK-NEXT:    ret <2 x float> [[RESULT0]]
+;
+  %result0 = call <2 x float> asm "; $0 = sin($1)","=v,v"(<2 x float> %x)
+  %result1 = call <2 x float> asm "; $0 = cos($1)","=v,v"(<2 x float> %x)
+  store <2 x float> %result1, ptr %ptr
+  ret <2 x float> %result0
+}
+
+define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp
+; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float 42.0)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float 42.0)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; GCN-LABEL: define void @sincos_v2f32
+; GCN-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr {
+; GCN-NEXT:  entry:
+; GCN-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]])
+; GCN-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; GCN-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]])
+; GCN-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; GCN-NEXT:    ret void
+;
+; CHECK-LABEL: define void @sincos_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+define void @sincos_f32_nobuiltin_callsite(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_nobuiltin_callsite
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]) #[[ATTR0:[0-9]+]]
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x) nobuiltin
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x) nobuiltin
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll
new file mode 100644
index 00000000000000..2e4703b5f8cf4c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.nobuiltin.ll
@@ -0,0 +1,71 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s
+
+; private variant of sincos is defined but marked nobuiltin, so
+; introduce a call to the generic one.
+
+define float @_Z3sinf(float %x) {
+; CHECK-LABEL: define float @_Z3sinf
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call float asm "
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+  %result = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  ret float %result
+}
+
+define float @_Z3cosf(float %x) {
+; CHECK-LABEL: define float @_Z3cosf
+; CHECK-SAME: (float [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call float asm "
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+  %result = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  ret float %result
+}
+
+define float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %ptr) nobuiltin {
+; CHECK-LABEL: define float @_Z6sincosfPU3AS5f
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(5) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[RESULT0:%.*]] = call float asm "
+; CHECK-NEXT:    [[RESULT1:%.*]] = call float asm "
+; CHECK-NEXT:    store float [[RESULT1]], ptr addrspace(5) [[PTR]], align 4
+; CHECK-NEXT:    ret float [[RESULT0]]
+;
+  %result0 = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  %result1 = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  store float %result1, ptr addrspace(5) %ptr
+  ret float %result0
+}
+
+define float @_Z6sincosfPU3AS0f(float %x, ptr %ptr) {
+; CHECK-LABEL: define float @_Z6sincosfPU3AS0f
+; CHECK-SAME: (float [[X:%.*]], ptr [[PTR:%.*]]) {
+; CHECK-NEXT:    [[RESULT0:%.*]] = call float asm "
+; CHECK-NEXT:    [[RESULT1:%.*]] = call float asm "
+; CHECK-NEXT:    store float [[RESULT1]], ptr [[PTR]], align 4
+; CHECK-NEXT:    ret float [[RESULT0]]
+;
+  %result0 = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  %result1 = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  store float %result1, ptr %ptr
+  ret float %result0
+}
+
+define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll
new file mode 100644
index 00000000000000..1f952f8a0c44cb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.defined.sin.cos.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+; sin and cos are already defined in the module but sincos isn't.
+
+define float @_Z3sinf(float noundef %x) {
+; CHECK-LABEL: define float @_Z3sinf
+; CHECK-SAME: (float noundef [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call float asm "
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+  %result = call float asm "; $0 = sin($1)","=v,v"(float %x)
+  ret float %result
+}
+
+define float @_Z3cosf(float noundef %x) {
+; CHECK-LABEL: define float @_Z3cosf
+; CHECK-SAME: (float noundef [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call float asm "
+; CHECK-NEXT:    ret float [[RESULT]]
+;
+  %result = call float asm "; $0 = cos($1)","=v,v"(float %x)
+  ret float %result
+}
+
+define <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x) {
+; CHECK-LABEL: define <2 x float> @_Z3sinDv2_f
+; CHECK-SAME: (<2 x float> noundef [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    ret <2 x float> [[RESULT]]
+;
+  %result = call <2 x float> asm "; $0 = sin($1)","=v,v"(<2 x float> %x)
+  ret <2 x float> %result
+}
+
+define <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x) {
+; CHECK-LABEL: define <2 x float> @_Z3cosDv2_f
+; CHECK-SAME: (<2 x float> noundef [[X:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = call <2 x float> asm "
+; CHECK-NEXT:    ret <2 x float> [[RESULT]]
+;
+  %result = call <2 x float> asm "; $0 = cos($1)","=v,v"(<2 x float> %x)
+  ret <2 x float> %result
+}
+
+define void @sincos_f32(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp
+; CHECK-SAME: (ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float 42.0)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float 42.0)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; GCN-LABEL: define void @sincos_v2f32
+; GCN-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) local_unnamed_addr {
+; GCN-NEXT:  entry:
+; GCN-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]])
+; GCN-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; GCN-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]])
+; GCN-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; GCN-NEXT:    ret void
+;
+; CHECK-LABEL: define void @sincos_v2f32
+; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
new file mode 100644
index 00000000000000..82b50ce9da0b6b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.ll
@@ -0,0 +1,1292 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall=1 -amdgpu-prelink < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z3sinf(float) #0
+declare float @_Z3cosf(float) #0
+declare <2 x float> @_Z3sinDv2_f(<2 x float>) #0
+declare <2 x float> @_Z3cosDv2_f(<2 x float>) #0
+declare <3 x float> @_Z3sinDv3_f(<3 x float>) #0
+declare <3 x float> @_Z3cosDv3_f(<3 x float>) #0
+declare <4 x float> @_Z3sinDv4_f(<4 x float>) #0
+declare <4 x float> @_Z3cosDv4_f(<4 x float>) #0
+declare <8 x float> @_Z3sinDv8_f(<8 x float>) #0
+declare <8 x float> @_Z3cosDv8_f(<8 x float>) #0
+declare <16 x float> @_Z3sinDv16_f(<16 x float>) #0
+declare <16 x float> @_Z3cosDv16_f(<16 x float>) #0
+
+
+declare half @_Z3sinDh(half) #0
+declare half @_Z3cosDh(half) #0
+declare <2 x half> @_Z3sinDv2_Dh(<2 x half>) #0
+declare <2 x half> @_Z3cosDv2_Dh(<2 x half>) #0
+declare <3 x half> @_Z3sinDv3_Dh(<3 x half>) #0
+declare <3 x half> @_Z3cosDv3_Dh(<3 x half>) #0
+declare <4 x half> @_Z3sinDv4_Dh(<4 x half>) #0
+declare <4 x half> @_Z3cosDv4_Dh(<4 x half>) #0
+declare <8 x half> @_Z3sinDv8_Dh(<8 x half>) #0
+declare <8 x half> @_Z3cosDv8_Dh(<8 x half>) #0
+declare <16 x half> @_Z3sinDv16_Dh(<16 x half>) #0
+declare <16 x half> @_Z3cosDv16_Dh(<16 x half>) #0
+
+
+declare double @_Z3sind(double) #0
+declare double @_Z3cosd(double) #0
+declare <2 x double> @_Z3sinDv2_d(<2 x double>) #0
+declare <2 x double> @_Z3cosDv2_d(<2 x double>) #0
+declare <3 x double> @_Z3sinDv3_d(<3 x double>) #0
+declare <3 x double> @_Z3cosDv3_d(<3 x double>) #0
+declare <4 x double> @_Z3sinDv4_d(<4 x double>) #0
+declare <4 x double> @_Z3cosDv4_d(<4 x double>) #0
+declare <8 x double> @_Z3sinDv8_d(<8 x double>) #0
+declare <8 x double> @_Z3cosDv8_d(<8 x double>) #0
+declare <16 x double> @_Z3sinDv16_d(<16 x double>) #0
+declare <16 x double> @_Z3cosDv16_d(<16 x double>) #0
+
+declare float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) writeonly %ptr) #1
+declare float @_Z6sincosfPU3AS0f(float %x, ptr writeonly %ptr) #1
+
+define void @sincos_f16_nocontract(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f16_nocontract
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call half @_Z3sinDh(half [[X]])
+; CHECK-NEXT:    store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call half @_Z3cosDh(half [[X]])
+; CHECK-NEXT:    store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call half @_Z3sinDh(half %x)
+  store half %call, ptr addrspace(1) %sin_out, align 2
+  %call1 = tail call half @_Z3cosDh(half %x)
+  store half %call1, ptr addrspace(1) %cos_out, align 2
+  ret void
+}
+
+
+define void @sincos_v2f16_nocontract(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f16_nocontract
+; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call <2 x half> @_Z3cosDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    store <2 x half> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call <2 x half> @_Z3sinDv2_Dh(<2 x half> %x)
+  store <2 x half> %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call <2 x half> @_Z3cosDv2_Dh(<2 x half> %x)
+  store <2 x half> %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+
+define void @sincos_f16(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f16
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]])
+; CHECK-NEXT:    store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]])
+; CHECK-NEXT:    store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract half @_Z3sinDh(half %x)
+  store half %call, ptr addrspace(1) %sin_out, align 2
+  %call1 = tail call contract half @_Z3cosDh(half %x)
+  store half %call1, ptr addrspace(1) %cos_out, align 2
+  ret void
+}
+
+define void @sincos_f16_order1(half %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f16_order1
+; CHECK-SAME: (half [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract half @_Z3cosDh(half [[X]])
+; CHECK-NEXT:    store half [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 2
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract half @_Z3sinDh(half [[X]])
+; CHECK-NEXT:    store half [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call1 = tail call contract half @_Z3cosDh(half %x)
+  store half %call1, ptr addrspace(1) %cos_out, align 2
+  %call = tail call contract half @_Z3sinDh(half %x)
+  store half %call, ptr addrspace(1) %sin_out, align 2
+  ret void
+}
+
+define void @sincos_v2f16(<2 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f16
+; CHECK-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x half> @_Z3sinDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    store <2 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x half> @_Z3cosDv2_Dh(<2 x half> [[X]])
+; CHECK-NEXT:    store <2 x half> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x half> @_Z3sinDv2_Dh(<2 x half> %x)
+  store <2 x half> %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract <2 x half> @_Z3cosDv2_Dh(<2 x half> %x)
+  store <2 x half> %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v3f16(<3 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v3f16
+; CHECK-SAME: (<3 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <3 x half> @_Z3sinDv3_Dh(<3 x half> [[X]])
+; CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <3 x half> [[CALL]], <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    store <4 x half> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL5:%.*]] = tail call contract <3 x half> @_Z3cosDv3_Dh(<3 x half> [[X]])
+; CHECK-NEXT:    [[EXTRACTVEC6:%.*]] = shufflevector <3 x half> [[CALL5]], <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    store <4 x half> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <3 x half> @_Z3sinDv3_Dh(<3 x half> %x)
+  %extractVec2 = shufflevector <3 x half> %call, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x half> %extractVec2, ptr addrspace(1) %sin_out, align 8
+  %call5 = tail call contract <3 x half> @_Z3cosDv3_Dh(<3 x half> %x)
+  %extractVec6 = shufflevector <3 x half> %call5, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x half> %extractVec6, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+
+define void @sincos_v4f16(<4 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v4f16
+; CHECK-SAME: (<4 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <4 x half> @_Z3sinDv4_Dh(<4 x half> [[X]])
+; CHECK-NEXT:    store <4 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <4 x half> @_Z3cosDv4_Dh(<4 x half> [[X]])
+; CHECK-NEXT:    store <4 x half> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <4 x half> @_Z3sinDv4_Dh(<4 x half> %x)
+  store <4 x half> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <4 x half> @_Z3cosDv4_Dh(<4 x half> %x)
+  store <4 x half> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+define void @sincos_v8f16(<8 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v8f16
+; CHECK-SAME: (<8 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <8 x half> @_Z3sinDv8_Dh(<8 x half> [[X]])
+; CHECK-NEXT:    store <8 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <8 x half> @_Z3cosDv8_Dh(<8 x half> [[X]])
+; CHECK-NEXT:    store <8 x half> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <8 x half> @_Z3sinDv8_Dh(<8 x half> %x)
+  store <8 x half> %call, ptr addrspace(1) %sin_out, align 16
+  %call1 = tail call contract <8 x half> @_Z3cosDv8_Dh(<8 x half> %x)
+  store <8 x half> %call1, ptr addrspace(1) %cos_out, align 16
+  ret void
+}
+
+
+define void @sincos_v16f16(<16 x half> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v16f16
+; CHECK-SAME: (<16 x half> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <16 x half> @_Z3sinDv16_Dh(<16 x half> [[X]])
+; CHECK-NEXT:    store <16 x half> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <16 x half> @_Z3cosDv16_Dh(<16 x half> [[X]])
+; CHECK-NEXT:    store <16 x half> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 32
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <16 x half> @_Z3sinDv16_Dh(<16 x half> %x)
+  store <16 x half> %call, ptr addrspace(1) %sin_out, align 32
+  %call1 = tail call contract <16 x half> @_Z3cosDv16_Dh(<16 x half> %x)
+  store <16 x half> %call1, ptr addrspace(1) %cos_out, align 32
+  ret void
+}
+
+
+define void @sincos_f32_nocontract(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_nocontract
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+
+define void @sincos_v2f32_nocontract(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f32_nocontract
+; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x float> @_Z3sinDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call <2 x float> @_Z3cosDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call <2 x float> @_Z3sinDv2_f(<2 x float> %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call <2 x float> @_Z3cosDv2_f(<2 x float> %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+
+define void @sincos_f32(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_order1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_order1
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f32
+; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+define void @sincos_v3f32(<3 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v3f32
+; CHECK-SAME: (<3 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <3 x float> @_Z3sinDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <3 x float> [[CALL]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    store <4 x float> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 16
+; CHECK-NEXT:    [[CALL5:%.*]] = tail call contract <3 x float> @_Z3cosDv3_f(<3 x float> [[X]])
+; CHECK-NEXT:    [[EXTRACTVEC6:%.*]] = shufflevector <3 x float> [[CALL5]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    store <4 x float> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <3 x float> @_Z3sinDv3_f(<3 x float> %x)
+  %extractVec2 = shufflevector <3 x float> %call, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x float> %extractVec2, ptr addrspace(1) %sin_out, align 16
+  %call5 = tail call contract <3 x float> @_Z3cosDv3_f(<3 x float> %x)
+  %extractVec6 = shufflevector <3 x float> %call5, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x float> %extractVec6, ptr addrspace(1) %cos_out, align 16
+  ret void
+}
+
+define void @sincos_v4f32(<4 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v4f32
+; CHECK-SAME: (<4 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <4 x float> @_Z3sinDv4_f(<4 x float> [[X]])
+; CHECK-NEXT:    store <4 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <4 x float> @_Z3cosDv4_f(<4 x float> [[X]])
+; CHECK-NEXT:    store <4 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <4 x float> @_Z3sinDv4_f(<4 x float> %x)
+  store <4 x float> %call, ptr addrspace(1) %sin_out, align 16
+  %call1 = tail call contract <4 x float> @_Z3cosDv4_f(<4 x float> %x)
+  store <4 x float> %call1, ptr addrspace(1) %cos_out, align 16
+  ret void
+}
+
+define void @sincos_v8f32(<8 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v8f32
+; CHECK-SAME: (<8 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <8 x float> @_Z3sinDv8_f(<8 x float> [[X]])
+; CHECK-NEXT:    store <8 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <8 x float> @_Z3cosDv8_f(<8 x float> [[X]])
+; CHECK-NEXT:    store <8 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 32
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <8 x float> @_Z3sinDv8_f(<8 x float> %x)
+  store <8 x float> %call, ptr addrspace(1) %sin_out, align 32
+  %call1 = tail call contract <8 x float> @_Z3cosDv8_f(<8 x float> %x)
+  store <8 x float> %call1, ptr addrspace(1) %cos_out, align 32
+  ret void
+}
+
+define void @sincos_v16f32(<16 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v16f32
+; CHECK-SAME: (<16 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <16 x float> @_Z3sinDv16_f(<16 x float> [[X]])
+; CHECK-NEXT:    store <16 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 64
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <16 x float> @_Z3cosDv16_f(<16 x float> [[X]])
+; CHECK-NEXT:    store <16 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 64
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <16 x float> @_Z3sinDv16_f(<16 x float> %x)
+  store <16 x float> %call, ptr addrspace(1) %sin_out, align 64
+  %call1 = tail call contract <16 x float> @_Z3cosDv16_f(<16 x float> %x)
+  store <16 x float> %call1, ptr addrspace(1) %cos_out, align 64
+  ret void
+}
+
+define void @sincos_f64_nocontract(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f64_nocontract
+; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call double @_Z3sind(double [[X]])
+; CHECK-NEXT:    store double [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call double @_Z3cosd(double [[X]])
+; CHECK-NEXT:    store double [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call double @_Z3sind(double %x)
+  store double %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call double @_Z3cosd(double %x)
+  store double %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+
+define void @sincos_v2f64_nocontract(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f64_nocontract
+; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call <2 x double> @_Z3sinDv2_d(<2 x double> [[X]])
+; CHECK-NEXT:    store <2 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call <2 x double> @_Z3cosDv2_d(<2 x double> [[X]])
+; CHECK-NEXT:    store <2 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call <2 x double> @_Z3sinDv2_d(<2 x double> %x)
+  store <2 x double> %call, ptr addrspace(1) %sin_out, align 16
+  %call1 = tail call <2 x double> @_Z3cosDv2_d(<2 x double> %x)
+  store <2 x double> %call1, ptr addrspace(1) %cos_out, align 16
+  ret void
+}
+
+define void @sincos_f64(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f64
+; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract double @_Z3sind(double [[X]])
+; CHECK-NEXT:    store double [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract double @_Z3cosd(double [[X]])
+; CHECK-NEXT:    store double [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract double @_Z3sind(double %x)
+  store double %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract double @_Z3cosd(double %x)
+  store double %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+define void @sincos_f64_order1(double %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f64_order1
+; CHECK-SAME: (double [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract double @_Z3cosd(double [[X]])
+; CHECK-NEXT:    store double [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract double @_Z3sind(double [[X]])
+; CHECK-NEXT:    store double [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call1 = tail call contract double @_Z3cosd(double %x)
+  store double %call1, ptr addrspace(1) %cos_out, align 8
+  %call = tail call contract double @_Z3sind(double %x)
+  store double %call, ptr addrspace(1) %sin_out, align 8
+  ret void
+}
+
+define void @sincos_v2f64(<2 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f64
+; CHECK-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x double> @_Z3sinDv2_d(<2 x double> [[X]])
+; CHECK-NEXT:    store <2 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 16
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x double> @_Z3cosDv2_d(<2 x double> [[X]])
+; CHECK-NEXT:    store <2 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 16
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x double> @_Z3sinDv2_d(<2 x double> %x)
+  store <2 x double> %call, ptr addrspace(1) %sin_out, align 16
+  %call1 = tail call contract <2 x double> @_Z3cosDv2_d(<2 x double> %x)
+  store <2 x double> %call1, ptr addrspace(1) %cos_out, align 16
+  ret void
+}
+
+define void @sincos_v3f64(<3 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v3f64
+; CHECK-SAME: (<3 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <3 x double> @_Z3sinDv3_d(<3 x double> [[X]])
+; CHECK-NEXT:    [[EXTRACTVEC2:%.*]] = shufflevector <3 x double> [[CALL]], <3 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    store <4 x double> [[EXTRACTVEC2]], ptr addrspace(1) [[SIN_OUT]], align 32
+; CHECK-NEXT:    [[CALL5:%.*]] = tail call contract <3 x double> @_Z3cosDv3_d(<3 x double> [[X]])
+; CHECK-NEXT:    [[EXTRACTVEC6:%.*]] = shufflevector <3 x double> [[CALL5]], <3 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+; CHECK-NEXT:    store <4 x double> [[EXTRACTVEC6]], ptr addrspace(1) [[COS_OUT]], align 32
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <3 x double> @_Z3sinDv3_d(<3 x double> %x)
+  %extractVec2 = shufflevector <3 x double> %call, <3 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x double> %extractVec2, ptr addrspace(1) %sin_out, align 32
+  %call5 = tail call contract <3 x double> @_Z3cosDv3_d(<3 x double> %x)
+  %extractVec6 = shufflevector <3 x double> %call5, <3 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
+  store <4 x double> %extractVec6, ptr addrspace(1) %cos_out, align 32
+  ret void
+}
+
+define void @sincos_v4f64(<4 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v4f64
+; CHECK-SAME: (<4 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <4 x double> @_Z3sinDv4_d(<4 x double> [[X]])
+; CHECK-NEXT:    store <4 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 32
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <4 x double> @_Z3cosDv4_d(<4 x double> [[X]])
+; CHECK-NEXT:    store <4 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 32
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <4 x double> @_Z3sinDv4_d(<4 x double> %x)
+  store <4 x double> %call, ptr addrspace(1) %sin_out, align 32
+  %call1 = tail call contract <4 x double> @_Z3cosDv4_d(<4 x double> %x)
+  store <4 x double> %call1, ptr addrspace(1) %cos_out, align 32
+  ret void
+}
+
+define void @sincos_v8f64(<8 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v8f64
+; CHECK-SAME: (<8 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <8 x double> @_Z3sinDv8_d(<8 x double> [[X]])
+; CHECK-NEXT:    store <8 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 64
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <8 x double> @_Z3cosDv8_d(<8 x double> [[X]])
+; CHECK-NEXT:    store <8 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 64
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <8 x double> @_Z3sinDv8_d(<8 x double> %x)
+  store <8 x double> %call, ptr addrspace(1) %sin_out, align 64
+  %call1 = tail call contract <8 x double> @_Z3cosDv8_d(<8 x double> %x)
+  store <8 x double> %call1, ptr addrspace(1) %cos_out, align 64
+  ret void
+}
+
+define void @sincos_v16f64(<16 x double> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v16f64
+; CHECK-SAME: (<16 x double> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <16 x double> @_Z3sinDv16_d(<16 x double> [[X]])
+; CHECK-NEXT:    store <16 x double> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 128
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <16 x double> @_Z3cosDv16_d(<16 x double> [[X]])
+; CHECK-NEXT:    store <16 x double> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 128
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <16 x double> @_Z3sinDv16_d(<16 x double> %x)
+  store <16 x double> %call, ptr addrspace(1) %sin_out, align 128
+  %call1 = tail call contract <16 x double> @_Z3cosDv16_d(<16 x double> %x)
+  store <16 x double> %call1, ptr addrspace(1) %cos_out, align 128
+  ret void
+}
+
+
+
+define void @sincos_f32_
diff erent_blocks(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_
diff erent_blocks
+; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       common.ret:
+; CHECK-NEXT:    ret void
+; CHECK:       bb0:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    br label [[COMMON_RET]]
+;
+entry:
+  br i1 %cond, label %bb0, label %bb1
+
+bb0:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  ret void
+
+bb1:
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+; The sin and cos are in 
diff erent blocks but always execute
+define void @sincos_f32_
diff erent_blocks_dominating_always_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) {
+; CHECK-LABEL: define void @sincos_f32_
diff erent_blocks_dominating_always_execute
+; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[OTHER:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    br i1 [[COND]], label [[BB0:%.*]], label [[BB1:%.*]]
+; CHECK:       bb0:
+; CHECK-NEXT:    store i32 0, ptr addrspace(1) [[OTHER]], align 4
+; CHECK-NEXT:    br label [[BB1]]
+; CHECK:       bb1:
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  br i1 %cond, label %bb0, label %bb1
+
+bb0:
+  store i32 0, ptr addrspace(1) %other
+  br label %bb1
+
+bb1:
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+; sin dominates cos but cos doesn't always execute.
+define void @sincos_f32_
diff erent_blocks_dominating_conditional_execute(i1 %cond, float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr addrspace(1) %other) {
+; CHECK-LABEL: define void @sincos_f32_
diff erent_blocks_dominating_conditional_execute
+; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr addrspace(1) nocapture readnone [[OTHER:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[COND]], float [[CALL1]], float 0.000000e+00
+; CHECK-NEXT:    store float [[SPEC_SELECT]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  br i1 %cond, label %bb0, label %bb1
+
+bb0:
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  br label %bb1
+
+bb1:
+  %phi = phi float [%call1, %bb0], [0.0, %entry]
+  store float %phi, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define float @select_sin_or_cos_f32(i1 %cond, float %x) {
+; CHECK-LABEL: define float @select_sin_or_cos_f32
+; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SIN:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    [[COS:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], float [[SIN]], float [[COS]]
+; CHECK-NEXT:    ret float [[SELECT]]
+;
+entry:
+  %sin = tail call contract float @_Z3sinf(float %x)
+  %cos = tail call contract float @_Z3cosf(float %x)
+  %select = select i1 %cond, float %sin, float %cos
+  ret float %select
+}
+
+declare void @func(ptr addrspace(1))
+
+define void @sincos_f32_value_is_instr(ptr addrspace(1) %value.ptr, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_instr
+; CHECK-SAME: (ptr addrspace(1) [[VALUE_PTR:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SINCOS_CALL:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    tail call void @func(ptr addrspace(1) [[VALUE_PTR]])
+; CHECK-NEXT:    [[X:%.*]] = load float, ptr addrspace(1) [[VALUE_PTR]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_CALL]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
+; CHECK-NEXT:    store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_CALL]], align 4
+; CHECK-NEXT:    store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  call void @func(ptr addrspace(1) %value.ptr)
+  %x = load float, ptr addrspace(1) %value.ptr
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_same_constexpr(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_same_constexpr
+; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float bitcast (i32 ptrtoint (ptr @func to i32) to float))
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float bitcast (i32 ptrtoint (ptr @func to i32) to float))
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float bitcast (i32 ptrtoint (ptr @func to i32) to float))
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float bitcast (i32 ptrtoint (ptr @func to i32) to float))
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_
diff erent_constexpr(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_
diff erent_constexpr
+; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float bitcast (i32 ptrtoint (ptr @func to i32) to float))
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float bitcast (i32 ptrtoint (ptr @sincos_f32_value_is_
diff erent_constexpr to i32) to float))
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float bitcast (i32 ptrtoint (ptr @func to i32) to float))
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float bitcast (i32 ptrtoint (ptr @sincos_f32_value_is_
diff erent_constexpr to i32) to float))
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp
+; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float 42.0)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float 42.0)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_
diff erent_constantfp(ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_
diff erent_constantfp
+; CHECK-SAME: (ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.250000e+01)
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float 42.0)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float 42.5)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_
diff erent_args(float %x, float %y, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_
diff erent_args
+; CHECK-SAME: (float [[X:%.*]], float [[Y:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[Y]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %y)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_flag_intersect0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_flag_intersect0
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call ninf contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call nnan contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call ninf contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_flag_intersect1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_flag_intersect1
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan nsz contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call nnan ninf contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call nnan nsz contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call nnan ninf contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32_flag_intersect1(<2 x float> %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f32_flag_intersect1
+; CHECK-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call nnan ninf contract <2 x float> @_Z3sinDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call nnan nsz contract <2 x float> @_Z3cosDv2_f(<2 x float> [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract ninf nnan <2 x float> @_Z3sinDv2_f(<2 x float> %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract nsz nnan <2 x float> @_Z3cosDv2_f(<2 x float> %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+declare void @use_stack_ptrs(ptr addrspace(5), ptr addrspace(5))
+
+define void @sincos_f32_alloca_insertpt(float %x) {
+; CHECK-LABEL: define void @sincos_f32_alloca_insertpt
+; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SINCOS_SIN:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[ALLOCA1:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN]], align 4
+; CHECK-NEXT:    store float [[TMP1]], ptr addrspace(5) [[ALLOCA0]], align 4
+; CHECK-NEXT:    store float [[TMP2]], ptr addrspace(5) [[ALLOCA1]], align 4
+; CHECK-NEXT:    call void @use_stack_ptrs(ptr addrspace(5) [[ALLOCA0]], ptr addrspace(5) [[ALLOCA1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %alloca0 = alloca i32, addrspace(5)
+  %alloca1 = alloca i32, addrspace(5)
+  %sin = tail call contract float @_Z3sinf(float %x)
+  %cos = tail call contract float @_Z3cosf(float %x)
+  store float %sin, ptr addrspace(5) %alloca0
+  store float %cos, ptr addrspace(5) %alloca1
+  call void @use_stack_ptrs(ptr addrspace(5) %alloca0, ptr addrspace(5) %alloca1)
+  ret void
+}
+
+define float @sincos_f32_unused_result_cos(float %x) {
+; CHECK-LABEL: define float @sincos_f32_unused_result_cos
+; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SINCOS_SIN:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+entry:
+  %alloca0 = alloca i32, addrspace(5)
+  %alloca1 = alloca i32, addrspace(5)
+  %sin = tail call contract float @_Z3sinf(float %x)
+  %dead = tail call contract float @_Z3cosf(float %x)
+  ret float %sin
+}
+
+define float @sincos_f32_unused_result_sin(float %x) {
+; CHECK-LABEL: define float @sincos_f32_unused_result_sin
+; CHECK-SAME: (float [[X:%.*]]) local_unnamed_addr #[[ATTR3]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SINCOS_DEAD:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_DEAD]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_DEAD]], align 4
+; CHECK-NEXT:    ret float [[TMP2]]
+;
+entry:
+  %alloca0 = alloca i32, addrspace(5)
+  %alloca1 = alloca i32, addrspace(5)
+  %dead = tail call contract float @_Z3sinf(float %x)
+  %cos = tail call contract float @_Z3cosf(float %x)
+  ret float %cos
+}
+
+define void @sincos_f32_repeated_uses(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_repeated_uses
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) [[SIN_OUT:%.*]], ptr addrspace(1) [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SINCOS_SIN0:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[__SINCOS_SIN1:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN0]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
+; CHECK-NEXT:    store volatile float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN1]] to ptr
+; CHECK-NEXT:    [[TMP3:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP2]])
+; CHECK-NEXT:    store volatile float [[TMP3]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN1]], align 4
+; CHECK-NEXT:    store volatile float [[TMP4]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN0]], align 4
+; CHECK-NEXT:    store volatile float [[TMP5]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[SIN2:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store volatile float [[SIN2]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %sin0 = tail call contract float @_Z3sinf(float %x)
+  store volatile float %sin0, ptr addrspace(1) %sin_out, align 4
+
+  %sin1 = tail call contract float @_Z3sinf(float %x)
+  store volatile float %sin1, ptr addrspace(1) %sin_out, align 4
+
+  %cos0 = tail call contract float @_Z3cosf(float %x)
+  store volatile float %cos0, ptr addrspace(1) %cos_out, align 4
+
+  %cos1 = tail call contract float @_Z3cosf(float %x)
+  store volatile float %cos1, ptr addrspace(1) %cos_out, align 4
+
+  %sin2 = tail call contract float @_Z3sinf(float %x)
+  store volatile float %sin2, ptr addrspace(1) %sin_out, align 4
+
+  ret void
+}
+
+define void @sin_f32_indirect_call_user(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr %func.ptr) {
+; CHECK-LABEL: define void @sin_f32_indirect_call_user
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float [[FUNC_PTR]](float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float %func.ptr(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @cos_f32_indirect_call_user(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out, ptr %func.ptr) {
+; CHECK-LABEL: define void @cos_f32_indirect_call_user
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]], ptr nocapture readonly [[FUNC_PTR:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float [[FUNC_PTR]](float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3cosf(float %x)
+  store float %call, ptr addrspace(1) %cos_out, align 4
+  %call1 = tail call contract float %func.ptr(float %x)
+  store float %call1, ptr addrspace(1) %sin_out, align 4
+  ret void
+}
+
+define void @sincos_f32_preserve_fpmath_0(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_0
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !fpmath !5
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]), !fpmath !5
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x), !fpmath !5
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x), !fpmath !5
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_preserve_fpmath_1(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_preserve_fpmath_1
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !fpmath !5
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]), !fpmath !6
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x), !fpmath !5
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x), !fpmath !6
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+; Should drop the metadata
+define void @sincos_f32_drop_fpmath(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_drop_fpmath
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !fpmath !5
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x), !fpmath !5
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_debuginfo(float %x, ptr addrspace(1) nocapture writeonly %sin_out, ptr addrspace(1) nocapture writeonly %cos_out) !dbg !15 {
+; CHECK-LABEL: define void @sincos_f32_debuginfo
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR2]] !dbg [[DBG7:![0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float [[X]]), !dbg [[DBG14:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata float [[CALL]], metadata [[META11:![0-9]+]], metadata !DIExpression()), !dbg [[DBG14]]
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4, !dbg [[DBG15:![0-9]+]]
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float [[X]]), !dbg [[DBG16:![0-9]+]]
+; CHECK-NEXT:    call void @llvm.dbg.value(metadata float [[CALL1]], metadata [[META13:![0-9]+]], metadata !DIExpression()), !dbg [[DBG16]]
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4, !dbg [[DBG17:![0-9]+]]
+; CHECK-NEXT:    ret void, !dbg [[DBG18:![0-9]+]]
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float %x), !dbg !19
+  call void @llvm.dbg.value(metadata float %call, metadata !17, metadata !DIExpression()), !dbg !19
+  store float %call, ptr addrspace(1) %sin_out, align 4, !dbg !20
+  %call1 = tail call contract float @_Z3cosf(float %x), !dbg !21
+  call void @llvm.dbg.value(metadata float %call1, metadata !18, metadata !DIExpression()), !dbg !21
+  store float %call1, ptr addrspace(1) %cos_out, align 4, !dbg !22
+  ret void, !dbg !23
+}
+
+define float @sin_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @sin_sincos_private_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[SIN0:%.*]] = tail call nnan ninf nsz contract float @_Z3sinf(float [[X]]), !fpmath !5
+; CHECK-NEXT:    store float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[SIN1:%.*]] = call nnan contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP]]), !fpmath !6
+; CHECK-NEXT:    [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4
+; CHECK-NEXT:    store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp = alloca float, addrspace(5)
+  %sin0 = tail call nsz ninf nnan contract float @_Z3sinf(float %x), !fpmath !5
+  store float %sin0, ptr addrspace(1) %sin_out, align 4
+  %sin1 = call contract nnan float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %cos.tmp), !fpmath !6
+  %cos1 = load float, ptr addrspace(5) %cos.tmp
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @sin_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @sin_sincos_generic_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[SIN0:%.*]] = tail call nsz contract float @_Z3sinf(float [[X]]), !fpmath !5
+; CHECK-NEXT:    store float [[SIN0]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[COS_TMP_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP]] to ptr
+; CHECK-NEXT:    [[SIN1:%.*]] = call ninf nsz contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP_CAST]]), !fpmath !6
+; CHECK-NEXT:    [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4
+; CHECK-NEXT:    store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp = alloca float, addrspace(5)
+  %sin0 = tail call nsz contract float @_Z3sinf(float %x), !fpmath !5
+  store float %sin0, ptr addrspace(1) %sin_out, align 4
+  %cos.tmp.cast = addrspacecast ptr addrspace(5) %cos.tmp to ptr
+  %sin1 = call contract nsz ninf float @_Z6sincosfPU3AS0f(float %x, ptr %cos.tmp.cast), !fpmath !6
+  %cos1 = load float, ptr addrspace(5) %cos.tmp
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @cos_sincos_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @cos_sincos_private_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[SIN1:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP]])
+; CHECK-NEXT:    [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4
+; CHECK-NEXT:    store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp = alloca float, addrspace(5)
+  %cos0 = tail call contract float @_Z3cosf(float %x)
+  store float %cos0, ptr addrspace(1) %cos_out, align 4
+  %sin1 = call contract float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %cos.tmp)
+  %cos1 = load float, ptr addrspace(5) %cos.tmp
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @cos_sincos_generic_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @cos_sincos_generic_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS0:%.*]] = tail call contract float @_Z3cosf(float [[X]])
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[COS_TMP_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP]] to ptr
+; CHECK-NEXT:    [[SIN1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP_CAST]])
+; CHECK-NEXT:    [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP]], align 4
+; CHECK-NEXT:    store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp = alloca float, addrspace(5)
+  %cos0 = tail call contract float @_Z3cosf(float %x)
+  store float %cos0, ptr addrspace(1) %cos_out, align 4
+  %cos.tmp.cast = addrspacecast ptr addrspace(5) %cos.tmp to ptr
+  %sin1 = call contract float @_Z6sincosfPU3AS0f(float %x, ptr %cos.tmp.cast)
+  %cos1 = load float, ptr addrspace(5) %cos.tmp
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @sincos_private_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @sincos_private_f32_x2
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[SIN0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP0]])
+; CHECK-NEXT:    [[COS0:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[SIN1:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP1]])
+; CHECK-NEXT:    [[COS1:%.*]] = load float, ptr addrspace(5) [[COS_TMP1]], align 4
+; CHECK-NEXT:    store float [[COS1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp0 = alloca float, addrspace(5)
+  %cos.tmp1 = alloca float, addrspace(5)
+
+  %sin0 = call contract float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %cos.tmp0)
+  %cos0 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos0, ptr addrspace(1) %cos_out, align 4
+
+  %sin1 = call contract float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %cos.tmp1)
+  %cos1 = load float, ptr addrspace(5) %cos.tmp1
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @sincos_generic_f32_x2(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @sincos_generic_f32_x2
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS_TMP0_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP0]] to ptr
+; CHECK-NEXT:    [[SIN0:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP0_CAST]])
+; CHECK-NEXT:    [[COS0:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[COS_TMP1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP1]] to ptr
+; CHECK-NEXT:    [[SIN1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP1_CAST]])
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp0 = alloca float, addrspace(5)
+  %cos.tmp1 = alloca float, addrspace(5)
+  %cos.tmp0.cast = addrspacecast ptr addrspace(5) %cos.tmp0 to ptr
+  %sin0 = call contract float @_Z6sincosfPU3AS0f(float %x, ptr %cos.tmp0.cast)
+  %cos0 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos0, ptr addrspace(1) %cos_out, align 4
+  %cos.tmp1.cast = addrspacecast ptr addrspace(5) %cos.tmp1 to ptr
+  %sin1 = call contract float @_Z6sincosfPU3AS0f(float %x, ptr %cos.tmp1.cast)
+  %cos1 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @sincos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @sincos_generic_private_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture readnone [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[SIN0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP0]])
+; CHECK-NEXT:    [[COS0:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[COS_TMP1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP1]] to ptr
+; CHECK-NEXT:    [[SIN1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP1_CAST]])
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN1]]
+;
+entry:
+  %cos.tmp0 = alloca float, addrspace(5)
+  %cos.tmp1 = alloca float, addrspace(5)
+  %sin0 = call contract float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %cos.tmp0)
+  %cos0 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos0, ptr addrspace(1) %cos_out, align 4
+  %cos.tmp1.cast = addrspacecast ptr addrspace(5) %cos.tmp1 to ptr
+  %sin1 = call contract float @_Z6sincosfPU3AS0f(float %x, ptr %cos.tmp1.cast)
+  %cos1 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  ret float %sin1
+}
+
+define float @sincos_mixed_sin_cos_generic_private_f32(float %x, ptr addrspace(1) %sin_out, ptr addrspace(1) %cos_out) {
+; CHECK-LABEL: define float @sincos_mixed_sin_cos_generic_private_f32
+; CHECK-SAME: (float [[X:%.*]], ptr addrspace(1) nocapture writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture writeonly [[COS_OUT:%.*]]) local_unnamed_addr #[[ATTR5]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[__SINCOS_SIN1:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS_TMP0:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[COS_TMP1:%.*]] = alloca float, align 4, addrspace(5)
+; CHECK-NEXT:    [[SIN0:%.*]] = call contract float @_Z6sincosfPU3AS5f(float [[X]], ptr addrspace(5) [[COS_TMP0]])
+; CHECK-NEXT:    [[TMP0:%.*]] = addrspacecast ptr addrspace(5) [[__SINCOS_SIN1]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[TMP0]])
+; CHECK-NEXT:    store float [[TMP1]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[COS0:%.*]] = load float, ptr addrspace(5) [[COS_TMP0]], align 4
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[COS_TMP1_CAST:%.*]] = addrspacecast ptr addrspace(5) [[COS_TMP1]] to ptr
+; CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr addrspace(5) [[__SINCOS_SIN1]], align 4
+; CHECK-NEXT:    store float [[TMP2]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    [[SIN2:%.*]] = call contract float @_Z6sincosfPU3AS0f(float [[X]], ptr [[COS_TMP1_CAST]])
+; CHECK-NEXT:    store float [[COS0]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret float [[SIN2]]
+;
+entry:
+  %cos.tmp0 = alloca float, addrspace(5)
+  %cos.tmp1 = alloca float, addrspace(5)
+  %sin0 = call contract float @_Z6sincosfPU3AS5f(float %x, ptr addrspace(5) %cos.tmp0)
+  %sin1 = call contract float @_Z3sinf(float %x)
+  store float %sin1, ptr addrspace(1) %sin_out
+  %cos0 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos0, ptr addrspace(1) %cos_out, align 4
+  %cos.tmp1.cast = addrspacecast ptr addrspace(5) %cos.tmp1 to ptr
+  %cos1 = call contract float @_Z3cosf(float %x)
+  store float %cos1, ptr addrspace(1) %cos_out, align 4
+  %sin2 = call contract float @_Z6sincosfPU3AS0f(float %x, ptr %cos.tmp1.cast)
+  %cos2 = load float, ptr addrspace(5) %cos.tmp0
+  store float %cos2, ptr addrspace(1) %cos_out, align 4
+  ret float %sin2
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #1 = { argmemonly nounwind willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.debugify = !{!2, !3}
+!llvm.module.flags = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "/tmp/arst.ll", directory: "/")
+!2 = !{i32 10}
+!3 = !{i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{float 4.000000e+00}
+!6 = !{float 2.000000e+00}
+!7 = !DILocation(line: 5, column: 1, scope: !8)
+!8 = distinct !DISubprogram(name: "sincos_f32_preserve_fpmath_1", linkageName: "sincos_f32_preserve_fpmath_1", scope: null, file: !1, line: 1, type: !9, scopeLine: 1, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!9 = !DISubroutineType(types: !10)
+!10 = !{}
+!11 = !{!12, !14}
+!12 = !DILocalVariable(name: "1", scope: !8, file: !1, line: 1, type: !13)
+!13 = !DIBasicType(name: "ty32", size: 32, encoding: DW_ATE_unsigned)
+!14 = !DILocalVariable(name: "2", scope: !8, file: !1, line: 3, type: !13)
+!15 = distinct !DISubprogram(name: "sincos_f32_debuginfo", linkageName: "sincos_f32_debuginfo", scope: null, file: !1, line: 6, type: !9, scopeLine: 6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+!16 = !{!17, !18}
+!17 = !DILocalVariable(name: "3", scope: !15, file: !1, line: 6, type: !13)
+!18 = !DILocalVariable(name: "4", scope: !15, file: !1, line: 8, type: !13)
+!19 = !DILocation(line: 6, column: 1, scope: !15)
+!20 = !DILocation(line: 7, column: 1, scope: !15)
+!21 = !DILocation(line: 8, column: 1, scope: !15)
+!22 = !DILocation(line: 9, column: 1, scope: !15)
+!23 = !DILocation(line: 10, column: 1, scope: !15)

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll
new file mode 100644
index 00000000000000..94d68fe7f6ac33
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.nobuiltins.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals --version 2
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib -amdgpu-prelink < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare float @_Z3sinf(float noundef)
+declare float @_Z3cosf(float noundef)
+declare <2 x float> @_Z3sinDv2_f(<2 x float> noundef)
+declare <2 x float> @_Z3cosDv2_f(<2 x float> noundef)
+
+define void @sincos_f32_nobuiltin(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #0 {
+; CHECK: Function Attrs: nobuiltin
+; CHECK-LABEL: define void @sincos_f32_nobuiltin
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32_nobuiltin(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #0 {
+; CHECK: Function Attrs: nobuiltin
+; CHECK-LABEL: define void @sincos_v2f32_nobuiltin
+; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+define void @sincos_f32_no_builtins(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #1 {
+; CHECK-LABEL: define void @sincos_f32_no_builtins
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32_no_builtins(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) #1 {
+; CHECK-LABEL: define void @sincos_v2f32_no_builtins
+; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]])
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x)
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x)
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+define void @sincos_f32_nobuiltin_callsite(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_nobuiltin_callsite
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x) #0
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x) #0
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_nobuiltin_callsite0(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_nobuiltin_callsite0
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x) #0
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_nobuiltin_callsite1(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_nobuiltin_callsite1
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x) #0
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_v2f32_nobuiltin_callsite(<2 x float> noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_v2f32_nobuiltin_callsite
+; CHECK-SAME: (<2 x float> noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store <2 x float> [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 8
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef [[X]]) #[[ATTR0]]
+; CHECK-NEXT:    store <2 x float> [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 8
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract <2 x float> @_Z3sinDv2_f(<2 x float> noundef %x) #0
+  store <2 x float> %call, ptr addrspace(1) %sin_out, align 8
+  %call1 = tail call contract <2 x float> @_Z3cosDv2_f(<2 x float> noundef %x) #0
+  store <2 x float> %call1, ptr addrspace(1) %cos_out, align 8
+  ret void
+}
+
+; TODO: Handle single function forms
+attributes #0 = { nobuiltin }
+attributes #1 = { "no-builtins" }
+;.
+; CHECK: attributes #[[ATTR0]] = { nobuiltin }
+; CHECK: attributes #[[ATTR1]] = { "no-builtins" }
+;.

diff  --git a/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll
new file mode 100644
index 00000000000000..ea67756010358b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-sincos.weak.ll
@@ -0,0 +1,43 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-simplifylib < %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8"
+
+declare extern_weak float @_Z3sinf(float noundef)
+declare extern_weak float @_Z3cosf(float noundef)
+
+define void @sincos_f32(float noundef %x, ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32
+; CHECK-SAME: (float noundef [[X:%.*]], ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float noundef [[X]])
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float noundef %x)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float noundef %x)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}
+
+define void @sincos_f32_value_is_same_constantfp(ptr addrspace(1) nocapture noundef writeonly %sin_out, ptr addrspace(1) nocapture noundef writeonly %cos_out) {
+; CHECK-LABEL: define void @sincos_f32_value_is_same_constantfp
+; CHECK-SAME: (ptr addrspace(1) nocapture noundef writeonly [[SIN_OUT:%.*]], ptr addrspace(1) nocapture noundef writeonly [[COS_OUT:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = tail call contract float @_Z3sinf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL]], ptr addrspace(1) [[SIN_OUT]], align 4
+; CHECK-NEXT:    [[CALL1:%.*]] = tail call contract float @_Z3cosf(float 4.200000e+01)
+; CHECK-NEXT:    store float [[CALL1]], ptr addrspace(1) [[COS_OUT]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %call = tail call contract float @_Z3sinf(float 42.0)
+  store float %call, ptr addrspace(1) %sin_out, align 4
+  %call1 = tail call contract float @_Z3cosf(float 42.0)
+  store float %call1, ptr addrspace(1) %cos_out, align 4
+  ret void
+}


        


More information about the llvm-commits mailing list