[llvm] Change `half` to use soft promotion rather than `PromoteFloat` (PR #152727)
Trevor Gross via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 8 07:10:16 PDT 2025
https://github.com/tgross35 created https://github.com/llvm/llvm-project/pull/152727
`half` currently uses the default legalization of promoting to a `f32`; however, this implementation implements math in a way that results in incorrect rounding. Switch to the soft promote implementation, which does not have this problem.
The SPARC ABI does not specify a `_Float16` type, so there is no concern with keeping interface compatibility.
Fixes the SPARC portion of [1].
[1]: https://github.com/llvm/llvm-project/issues/97975
>From 6d3bf6d9963dc6c4c6efe59e2ffcfec02dc11b46 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Fri, 8 Aug 2025 08:03:18 -0500
Subject: [PATCH 1/2] [SPARC] Add a test for half support (NFC)
To prepare for updates to SPARC `half` support, add a version of the
`half.ll` test that is present on other targets.
---
llvm/test/CodeGen/SPARC/half.ll | 868 ++++++++++++++++++++++++++++++++
1 file changed, 868 insertions(+)
create mode 100644 llvm/test/CodeGen/SPARC/half.ll
diff --git a/llvm/test/CodeGen/SPARC/half.ll b/llvm/test/CodeGen/SPARC/half.ll
new file mode 100644
index 0000000000000..e1e0c7cbd24e9
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/half.ll
@@ -0,0 +1,868 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+; RUN: llc %s -o - -mtriple=sparc-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC32
+; RUN: llc %s -o - -mtriple=sparc64-unknown-linux-gnu | FileCheck %s --check-prefixes=SPARC64
+
+; Tests for various operations on half precison float. Much of the test is
+; copied from test/CodeGen/X86/half.ll.
+
+define void @store(half %x, ptr %p) nounwind {
+; SPARC32-LABEL: store:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: sth %o0, [%i1]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: store:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sth %o0, [%i1]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ store half %x, ptr %p
+ ret void
+}
+
+define half @return(ptr %p) nounwind {
+; SPARC32-LABEL: return:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: return:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %r = load half, ptr %p
+ ret half %r
+}
+
+define dso_local double @loadd(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; SPARC32-LABEL: loadd:
+; SPARC32: ! %bb.0: ! %entry
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+2], %o0
+; SPARC32-NEXT: fstod %f0, %f0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: loadd:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+2], %o0
+; SPARC64-NEXT: fstod %f0, %f0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+entry:
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+ %0 = load i16, ptr %arrayidx, align 2
+ %1 = tail call double @llvm.convert.from.fp16.f64(i16 %0)
+ ret double %1
+}
+
+define dso_local float @loadf(ptr nocapture readonly %a) local_unnamed_addr nounwind {
+; SPARC32-LABEL: loadf:
+; SPARC32: ! %bb.0: ! %entry
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+2], %o0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: loadf:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+2], %o0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+entry:
+ %arrayidx = getelementptr inbounds i16, ptr %a, i64 1
+ %0 = load i16, ptr %arrayidx, align 2
+ %1 = tail call float @llvm.convert.from.fp16.f32(i16 %0)
+ ret float %1
+}
+
+define dso_local void @stored(ptr nocapture %a, double %b) local_unnamed_addr nounwind {
+; SPARC32-LABEL: stored:
+; SPARC32: ! %bb.0: ! %entry
+; SPARC32-NEXT: save %sp, -112, %sp
+; SPARC32-NEXT: mov %i2, %i3
+; SPARC32-NEXT: mov %i1, %i2
+; SPARC32-NEXT: std %i2, [%fp+-8]
+; SPARC32-NEXT: ldd [%fp+-8], %f0
+; SPARC32-NEXT: std %f0, [%fp+-16]
+; SPARC32-NEXT: call __truncdfhf2
+; SPARC32-NEXT: ldd [%fp+-16], %o0
+; SPARC32-NEXT: sth %o0, [%i0]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: stored:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: fmovd %f2, %f0
+; SPARC64-NEXT: call __truncdfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sth %o0, [%i0]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f64(double %b)
+ store i16 %0, ptr %a, align 2
+ ret void
+}
+
+define dso_local void @storef(ptr nocapture %a, float %b) local_unnamed_addr nounwind {
+; SPARC32-LABEL: storef:
+; SPARC32: ! %bb.0: ! %entry
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i1, %o0
+; SPARC32-NEXT: sth %o0, [%i0]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: storef:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: fmovs %f3, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sth %o0, [%i0]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+entry:
+ %0 = tail call i16 @llvm.convert.to.fp16.f32(float %b)
+ store i16 %0, ptr %a, align 2
+ ret void
+}
+
+define void @test_load_store(ptr %in, ptr %out) nounwind {
+; SPARC32-LABEL: test_load_store:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: lduh [%o0], %o0
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: sth %o0, [%o1]
+;
+; SPARC64-LABEL: test_load_store:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: lduh [%o0], %o0
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: sth %o0, [%o1]
+ %val = load half, ptr %in
+ store half %val, ptr %out
+ ret void
+}
+
+define i16 @test_bitcast_from_half(ptr %addr) nounwind {
+; SPARC32-LABEL: test_bitcast_from_half:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: lduh [%o0], %o0
+;
+; SPARC64-LABEL: test_bitcast_from_half:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: lduh [%o0], %o0
+ %val = load half, ptr %addr
+ %val_int = bitcast half %val to i16
+ ret i16 %val_int
+}
+
+define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
+; SPARC32-LABEL: test_bitcast_to_half:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: sth %o1, [%o0]
+;
+; SPARC64-LABEL: test_bitcast_to_half:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: sth %o1, [%o0]
+ %val_fp = bitcast i16 %in to half
+ store half %val_fp, ptr %addr
+ ret void
+}
+
+define half @from_bits(i16 %x) nounwind {
+; SPARC32-LABEL: from_bits:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: from_bits:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: srl %i0, 0, %o0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %res = bitcast i16 %x to half
+ ret half %res
+}
+
+define i16 @to_bits(half %x) nounwind {
+; SPARC32-LABEL: to_bits:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: sethi 4194240, %i0
+; SPARC32-NEXT: andn %o0, %i0, %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: to_bits:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sethi 63, %i0
+; SPARC64-NEXT: or %i0, 1023, %i0
+; SPARC64-NEXT: and %o0, %i0, %i0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %res = bitcast half %x to i16
+ ret i16 %res
+}
+
+define float @test_extend32(ptr %addr) nounwind {
+; SPARC32-LABEL: test_extend32:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_extend32:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to float
+ ret float %val32
+}
+
+define double @test_extend64(ptr %addr) nounwind {
+; SPARC32-LABEL: test_extend64:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: fstod %f0, %f0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_extend64:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: fstod %f0, %f0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %val16 = load half, ptr %addr
+ %val32 = fpext half %val16 to double
+ ret double %val32
+}
+
+define void @test_trunc32(float %in, ptr %addr) nounwind {
+; SPARC32-LABEL: test_trunc32:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: sth %o0, [%i1]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_trunc32:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sth %o0, [%i1]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %val16 = fptrunc float %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+
+define void @test_trunc64(double %in, ptr %addr) nounwind {
+; SPARC32-LABEL: test_trunc64:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -112, %sp
+; SPARC32-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1 def $i0_i1
+; SPARC32-NEXT: ! kill: def $i0 killed $i0 killed $i0_i1 def $i0_i1
+; SPARC32-NEXT: std %i0, [%fp+-8]
+; SPARC32-NEXT: ldd [%fp+-8], %f0
+; SPARC32-NEXT: std %f0, [%fp+-16]
+; SPARC32-NEXT: call __truncdfhf2
+; SPARC32-NEXT: ldd [%fp+-16], %o0
+; SPARC32-NEXT: sth %o0, [%i2]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_trunc64:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __truncdfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sth %o0, [%i1]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %val16 = fptrunc double %in to half
+ store half %val16, ptr %addr
+ ret void
+}
+
+define i64 @test_fptosi_i64(ptr %p) nounwind {
+; SPARC32-LABEL: test_fptosi_i64:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __fixsfdi
+; SPARC32-NEXT: ld [%fp+-4], %o0
+; SPARC32-NEXT: mov %o0, %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o1, %o1
+;
+; SPARC64-LABEL: test_fptosi_i64:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: fstox %f0, %f0
+; SPARC64-NEXT: std %f0, [%fp+2039]
+; SPARC64-NEXT: ldx [%fp+2039], %i0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %a = load half, ptr %p, align 2
+ %r = fptosi half %a to i64
+ ret i64 %r
+}
+
+define void @test_sitofp_i64(i64 %a, ptr %p) nounwind {
+; SPARC32-LABEL: test_sitofp_i64:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: call __floatdisf
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-4], %o0
+; SPARC32-NEXT: sth %o0, [%i2]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_sitofp_i64:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: stx %i0, [%fp+2039]
+; SPARC64-NEXT: ldd [%fp+2039], %f0
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: fxtos %f0, %f1
+; SPARC64-NEXT: sth %o0, [%i1]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %r = sitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
+
+define i64 @test_fptoui_i64(ptr %p) nounwind {
+; SPARC32-LABEL: test_fptoui_i64:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __fixunssfdi
+; SPARC32-NEXT: ld [%fp+-4], %o0
+; SPARC32-NEXT: mov %o0, %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o1, %o1
+;
+; SPARC64-LABEL: test_fptoui_i64:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: sethi %h44(.LCPI17_0), %i0
+; SPARC64-NEXT: add %i0, %m44(.LCPI17_0), %i0
+; SPARC64-NEXT: sllx %i0, 12, %i0
+; SPARC64-NEXT: ld [%i0+%l44(.LCPI17_0)], %f1
+; SPARC64-NEXT: fsubs %f0, %f1, %f2
+; SPARC64-NEXT: fstox %f2, %f2
+; SPARC64-NEXT: std %f2, [%fp+2031]
+; SPARC64-NEXT: fstox %f0, %f2
+; SPARC64-NEXT: std %f2, [%fp+2039]
+; SPARC64-NEXT: ldx [%fp+2031], %i0
+; SPARC64-NEXT: sethi 0, %i1
+; SPARC64-NEXT: or %i1, 0, %i1
+; SPARC64-NEXT: sethi 2097152, %i2
+; SPARC64-NEXT: or %i2, 0, %i2
+; SPARC64-NEXT: sllx %i2, 32, %i2
+; SPARC64-NEXT: ldx [%fp+2039], %i3
+; SPARC64-NEXT: or %i2, %i1, %i1
+; SPARC64-NEXT: xor %i0, %i1, %i0
+; SPARC64-NEXT: fcmps %fcc0, %f0, %f1
+; SPARC64-NEXT: movl %fcc0, %i3, %i0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %a = load half, ptr %p, align 2
+ %r = fptoui half %a to i64
+ ret i64 %r
+}
+
+define void @test_uitofp_i64(i64 %a, ptr %p) nounwind {
+; SPARC32-LABEL: test_uitofp_i64:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: call __floatundisf
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-4], %o0
+; SPARC32-NEXT: sth %o0, [%i2]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_uitofp_i64:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: stx %i0, [%fp+2031]
+; SPARC64-NEXT: srlx %i0, 1, %i2
+; SPARC64-NEXT: and %i0, 1, %i3
+; SPARC64-NEXT: or %i3, %i2, %i2
+; SPARC64-NEXT: stx %i2, [%fp+2039]
+; SPARC64-NEXT: ldd [%fp+2031], %f0
+; SPARC64-NEXT: ldd [%fp+2039], %f2
+; SPARC64-NEXT: fxtos %f0, %f1
+; SPARC64-NEXT: fxtos %f2, %f0
+; SPARC64-NEXT: fadds %f0, %f0, %f0
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: fmovrslz %i0, %f0, %f1
+; SPARC64-NEXT: sth %o0, [%i1]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %r = uitofp i64 %a to half
+ store half %r, ptr %p
+ ret void
+}
+
+define <4 x float> @test_extend32_vec4(ptr %p) nounwind {
+; SPARC32-LABEL: test_extend32_vec4:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -104, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: st %f0, [%fp+-4] ! 4-byte Folded Spill
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+2], %o0
+; SPARC32-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+4], %o0
+; SPARC32-NEXT: st %f0, [%fp+-12] ! 4-byte Folded Spill
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+6], %o0
+; SPARC32-NEXT: fmovs %f0, %f3
+; SPARC32-NEXT: ld [%fp+-4], %f0 ! 4-byte Folded Reload
+; SPARC32-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
+; SPARC32-NEXT: ld [%fp+-12], %f2 ! 4-byte Folded Reload
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_extend32_vec4:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: st %f0, [%fp+2043] ! 4-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+2], %o0
+; SPARC64-NEXT: st %f0, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+4], %o0
+; SPARC64-NEXT: st %f0, [%fp+2035] ! 4-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+6], %o0
+; SPARC64-NEXT: fmovs %f0, %f3
+; SPARC64-NEXT: ld [%fp+2043], %f0 ! 4-byte Folded Reload
+; SPARC64-NEXT: ld [%fp+2039], %f1 ! 4-byte Folded Reload
+; SPARC64-NEXT: ld [%fp+2035], %f2 ! 4-byte Folded Reload
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %a = load <4 x half>, ptr %p, align 8
+ %b = fpext <4 x half> %a to <4 x float>
+ ret <4 x float> %b
+}
+
+define <4 x double> @test_extend64_vec4(ptr %p) nounwind {
+; SPARC32-LABEL: test_extend64_vec4:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -120, %sp
+; SPARC32-NEXT: ld [%fp+64], %i1
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+4], %o0
+; SPARC32-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0+2], %o0
+; SPARC32-NEXT: st %f0, [%fp+-16] ! 4-byte Folded Spill
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i0], %o0
+; SPARC32-NEXT: lduh [%i0+6], %o0
+; SPARC32-NEXT: fstod %f0, %f0
+; SPARC32-NEXT: std %f0, [%fp+-24] ! 8-byte Folded Spill
+; SPARC32-NEXT: ld [%fp+-16], %f0 ! 4-byte Folded Reload
+; SPARC32-NEXT: fstod %f0, %f0
+; SPARC32-NEXT: std %f0, [%fp+-16] ! 8-byte Folded Spill
+; SPARC32-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload
+; SPARC32-NEXT: fstod %f0, %f0
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: std %f0, [%fp+-8]
+; SPARC32-NEXT: fstod %f0, %f0
+; SPARC32-NEXT: std %f0, [%i1+24]
+; SPARC32-NEXT: ldd [%fp+-8], %f0 ! 8-byte Folded Reload
+; SPARC32-NEXT: std %f0, [%i1+16]
+; SPARC32-NEXT: ldd [%fp+-16], %f0 ! 8-byte Folded Reload
+; SPARC32-NEXT: std %f0, [%i1+8]
+; SPARC32-NEXT: ldd [%fp+-24], %f0 ! 8-byte Folded Reload
+; SPARC32-NEXT: std %f0, [%i1]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_extend64_vec4:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -208, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: fstod %f0, %f0
+; SPARC64-NEXT: std %f0, [%fp+2039] ! 8-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+2], %o0
+; SPARC64-NEXT: fstod %f0, %f0
+; SPARC64-NEXT: std %f0, [%fp+2031] ! 8-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+4], %o0
+; SPARC64-NEXT: fstod %f0, %f0
+; SPARC64-NEXT: std %f0, [%fp+2023] ! 8-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i0+6], %o0
+; SPARC64-NEXT: fstod %f0, %f6
+; SPARC64-NEXT: ldd [%fp+2039], %f0 ! 8-byte Folded Reload
+; SPARC64-NEXT: ldd [%fp+2031], %f2 ! 8-byte Folded Reload
+; SPARC64-NEXT: ldd [%fp+2023], %f4 ! 8-byte Folded Reload
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %a = load <4 x half>, ptr %p, align 8
+ %b = fpext <4 x half> %a to <4 x double>
+ ret <4 x double> %b
+}
+
+define void @test_trunc32_vec4(<4 x float> %a, ptr %p) nounwind {
+; SPARC32-LABEL: test_trunc32_vec4:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: mov %o0, %i0
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i1, %o0
+; SPARC32-NEXT: mov %o0, %i1
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i2, %o0
+; SPARC32-NEXT: mov %o0, %i2
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i3, %o0
+; SPARC32-NEXT: sth %o0, [%i4+6]
+; SPARC32-NEXT: sth %i2, [%i4+4]
+; SPARC32-NEXT: sth %i1, [%i4+2]
+; SPARC32-NEXT: sth %i0, [%i4]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_trunc32_vec4:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: st %f7, [%fp+2043] ! 4-byte Folded Spill
+; SPARC64-NEXT: st %f5, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: st %f3, [%fp+2035]
+; SPARC64-NEXT: mov %o0, %i0
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: ld [%fp+2035], %f1
+; SPARC64-NEXT: mov %o0, %i1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: ld [%fp+2039], %f1
+; SPARC64-NEXT: mov %o0, %i2
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: ld [%fp+2043], %f1
+; SPARC64-NEXT: sth %o0, [%i4+6]
+; SPARC64-NEXT: sth %i2, [%i4+4]
+; SPARC64-NEXT: sth %i1, [%i4+2]
+; SPARC64-NEXT: sth %i0, [%i4]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %v = fptrunc <4 x float> %a to <4 x half>
+ store <4 x half> %v, ptr %p
+ ret void
+}
+
+define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
+; SPARC32-LABEL: test_trunc64_vec4:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -160, %sp
+; SPARC32-NEXT: ld [%fp+92], %g2
+; SPARC32-NEXT: ld [%fp+96], %g3
+; SPARC32-NEXT: ! kill: def $i5 killed $i5 killed $i4_i5 def $i4_i5
+; SPARC32-NEXT: ! kill: def $i3 killed $i3 killed $i2_i3 def $i2_i3
+; SPARC32-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1 def $i0_i1
+; SPARC32-NEXT: std %g2, [%fp+-32]
+; SPARC32-NEXT: ! kill: def $i4 killed $i4 killed $i4_i5 def $i4_i5
+; SPARC32-NEXT: std %i4, [%fp+-24]
+; SPARC32-NEXT: ! kill: def $i2 killed $i2 killed $i2_i3 def $i2_i3
+; SPARC32-NEXT: std %i2, [%fp+-16]
+; SPARC32-NEXT: ! kill: def $i0 killed $i0 killed $i0_i1 def $i0_i1
+; SPARC32-NEXT: std %i0, [%fp+-8]
+; SPARC32-NEXT: ld [%fp+100], %i3
+; SPARC32-NEXT: ldd [%fp+-8], %f0
+; SPARC32-NEXT: ldd [%fp+-16], %f2
+; SPARC32-NEXT: ldd [%fp+-24], %f4
+; SPARC32-NEXT: ldd [%fp+-32], %f6
+; SPARC32-NEXT: std %f0, [%fp+-40]
+; SPARC32-NEXT: std %f2, [%fp+-48]
+; SPARC32-NEXT: std %f4, [%fp+-56]
+; SPARC32-NEXT: std %f6, [%fp+-64]
+; SPARC32-NEXT: call __truncdfhf2
+; SPARC32-NEXT: ldd [%fp+-40], %o0
+; SPARC32-NEXT: mov %o0, %i0
+; SPARC32-NEXT: call __truncdfhf2
+; SPARC32-NEXT: ldd [%fp+-48], %o0
+; SPARC32-NEXT: mov %o0, %i1
+; SPARC32-NEXT: call __truncdfhf2
+; SPARC32-NEXT: ldd [%fp+-56], %o0
+; SPARC32-NEXT: mov %o0, %i2
+; SPARC32-NEXT: call __truncdfhf2
+; SPARC32-NEXT: ldd [%fp+-64], %o0
+; SPARC32-NEXT: sth %o0, [%i3+6]
+; SPARC32-NEXT: sth %i2, [%i3+4]
+; SPARC32-NEXT: sth %i1, [%i3+2]
+; SPARC32-NEXT: sth %i0, [%i3]
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_trunc64_vec4:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -208, %sp
+; SPARC64-NEXT: std %f6, [%fp+2039] ! 8-byte Folded Spill
+; SPARC64-NEXT: std %f4, [%fp+2031] ! 8-byte Folded Spill
+; SPARC64-NEXT: call __truncdfhf2
+; SPARC64-NEXT: std %f2, [%fp+2023]
+; SPARC64-NEXT: mov %o0, %i0
+; SPARC64-NEXT: call __truncdfhf2
+; SPARC64-NEXT: ldd [%fp+2023], %f0
+; SPARC64-NEXT: mov %o0, %i1
+; SPARC64-NEXT: call __truncdfhf2
+; SPARC64-NEXT: ldd [%fp+2031], %f0
+; SPARC64-NEXT: mov %o0, %i2
+; SPARC64-NEXT: call __truncdfhf2
+; SPARC64-NEXT: ldd [%fp+2039], %f0
+; SPARC64-NEXT: sth %o0, [%i4+6]
+; SPARC64-NEXT: sth %i2, [%i4+4]
+; SPARC64-NEXT: sth %i1, [%i4+2]
+; SPARC64-NEXT: sth %i0, [%i4]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %v = fptrunc <4 x double> %a to <4 x half>
+ store <4 x half> %v, ptr %p
+ ret void
+}
+
+define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
+; SPARC32-LABEL: test_sitofp_fadd_i32:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -104, %sp
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: lduh [%i1], %o0
+; SPARC32-NEXT: st %i0, [%fp+-4]
+; SPARC32-NEXT: ld [%fp+-4], %f1
+; SPARC32-NEXT: st %f0, [%fp+-12] ! 4-byte Folded Spill
+; SPARC32-NEXT: fitos %f1, %f0
+; SPARC32-NEXT: st %f0, [%fp+-8]
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-8], %o0
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: nop
+; SPARC32-NEXT: ld [%fp+-12], %f1 ! 4-byte Folded Reload
+; SPARC32-NEXT: fadds %f1, %f0, %f0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: test_sitofp_fadd_i32:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: lduh [%i1], %o0
+; SPARC64-NEXT: st %f0, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: st %i0, [%fp+2043]
+; SPARC64-NEXT: ld [%fp+2043], %f0
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: fitos %f0, %f1
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ld [%fp+2039], %f1 ! 4-byte Folded Reload
+; SPARC64-NEXT: fadds %f1, %f0, %f0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %tmp0 = load half, ptr %b
+ %tmp1 = sitofp i32 %a to half
+ %tmp2 = fadd half %tmp0, %tmp1
+ %tmp3 = fpext half %tmp2 to float
+ ret float %tmp3
+}
+
+define half @PR40273(half) nounwind {
+; SPARC32-LABEL: PR40273:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: nop
+; SPARC32-NEXT: sethi %hi(.LCPI24_0), %i0
+; SPARC32-NEXT: ld [%i0+%lo(.LCPI24_0)], %f1
+; SPARC32-NEXT: fcmps %f0, %f1
+; SPARC32-NEXT: nop
+; SPARC32-NEXT: fbne .LBB24_2
+; SPARC32-NEXT: nop
+; SPARC32-NEXT: ! %bb.1:
+; SPARC32-NEXT: ba .LBB24_3
+; SPARC32-NEXT: mov %g0, %i0
+; SPARC32-NEXT: .LBB24_2:
+; SPARC32-NEXT: mov 4, %i0
+; SPARC32-NEXT: .LBB24_3:
+; SPARC32-NEXT: sethi %hi(.LCPI24_1), %i1
+; SPARC32-NEXT: add %i1, %lo(.LCPI24_1), %i1
+; SPARC32-NEXT: ld [%i1+%i0], %f0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: PR40273:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: sethi %h44(.LCPI24_0), %i0
+; SPARC64-NEXT: add %i0, %m44(.LCPI24_0), %i0
+; SPARC64-NEXT: sllx %i0, 12, %i0
+; SPARC64-NEXT: ld [%i0+%l44(.LCPI24_0)], %f1
+; SPARC64-NEXT: mov %g0, %i0
+; SPARC64-NEXT: fcmps %fcc0, %f0, %f1
+; SPARC64-NEXT: movne %fcc0, 4, %i0
+; SPARC64-NEXT: sethi %h44(.LCPI24_1), %i1
+; SPARC64-NEXT: add %i1, %m44(.LCPI24_1), %i1
+; SPARC64-NEXT: sllx %i1, 12, %i1
+; SPARC64-NEXT: add %i1, %l44(.LCPI24_1), %i1
+; SPARC64-NEXT: ld [%i1+%i0], %f0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %2 = fcmp une half %0, 0xH0000
+ %3 = uitofp i1 %2 to half
+ ret half %3
+}
+
+define half @fabs(half %x) nounwind {
+; SPARC32-LABEL: fabs:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: nop
+; SPARC32-NEXT: fabss %f0, %f0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: fabs:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -176, %sp
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: fabss %f0, %f0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %a = call half @llvm.fabs.f16(half %x)
+ ret half %a
+}
+
+define half @fcopysign(half %x, half %y) nounwind {
+; SPARC32-LABEL: fcopysign:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: nop
+; SPARC32-NEXT: sethi 2097152, %i0
+; SPARC32-NEXT: and %i1, %i0, %i0
+; SPARC32-NEXT: cmp %i0, 0
+; SPARC32-NEXT: be .LBB26_2
+; SPARC32-NEXT: fabss %f0, %f0
+; SPARC32-NEXT: ! %bb.1:
+; SPARC32-NEXT: fnegs %f0, %f0
+; SPARC32-NEXT: .LBB26_2:
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC64-LABEL: fcopysign:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: st %f3, [%fp+2039]
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ld [%fp+2039], %f1 ! 4-byte Folded Reload
+; SPARC64-NEXT: st %f1, [%fp+2043]
+; SPARC64-NEXT: ld [%fp+2043], %i0
+; SPARC64-NEXT: sethi 2097152, %i1
+; SPARC64-NEXT: and %i0, %i1, %i0
+; SPARC64-NEXT: fabss %f0, %f0
+; SPARC64-NEXT: fnegs %f0, %f1
+; SPARC64-NEXT: cmp %i0, 0
+; SPARC64-NEXT: fmovsne %icc, %f1, %f0
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore
+ %a = call half @llvm.copysign.f16(half %x, half %y)
+ ret half %a
+}
>From 9d1af5064d031f460bc0f8d4499bb15be43f6a69 Mon Sep 17 00:00:00 2001
From: Trevor Gross <tmgross at umich.edu>
Date: Wed, 6 Aug 2025 07:21:41 +0000
Subject: [PATCH 2/2] [SPARC] Change `half` to use soft promotion rather than
`PromoteFloat`
`half` currently uses the default legalization of promoting to a `f32`;
however, this implementation implements math in a way that results in
incorrect rounding. Switch to the soft promote implementation, which
does not have this problem.
The SPARC ABI does not specify a `_Float16` type, so there is no concern
with keeping interface compatibility.
Fixes the SPARC portion of [1].
[1]: https://github.com/llvm/llvm-project/issues/97975
---
llvm/lib/Target/Sparc/SparcISelLowering.h | 2 +
llvm/test/CodeGen/SPARC/fp16-promote.ll | 64 +++--
llvm/test/CodeGen/SPARC/half.ll | 185 ++++--------
llvm/test/CodeGen/SPARC/llvm.sincos.ll | 335 ++++++++++++----------
4 files changed, 286 insertions(+), 300 deletions(-)
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h
index 0d220f8c3d32e..3a6aaf929707d 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.h
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.h
@@ -28,6 +28,8 @@ namespace llvm {
bool useSoftFloat() const override;
+ bool softPromoteHalfType() const override { return true; }
+
/// computeKnownBitsForTargetNode - Determine which of the bits specified
/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
diff --git a/llvm/test/CodeGen/SPARC/fp16-promote.ll b/llvm/test/CodeGen/SPARC/fp16-promote.ll
index efe67b04e8fb3..64873b744de50 100644
--- a/llvm/test/CodeGen/SPARC/fp16-promote.ll
+++ b/llvm/test/CodeGen/SPARC/fp16-promote.ll
@@ -329,13 +329,14 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; V8-OPT-LABEL: test_fadd:
; V8-OPT: ! %bb.0:
; V8-OPT-NEXT: save %sp, -104, %sp
+; V8-OPT-NEXT: lduh [%i0], %i2
; V8-OPT-NEXT: call __extendhfsf2
-; V8-OPT-NEXT: lduh [%i0], %o0
+; V8-OPT-NEXT: lduh [%i1], %o0
; V8-OPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
; V8-OPT-NEXT: call __extendhfsf2
-; V8-OPT-NEXT: lduh [%i1], %o0
+; V8-OPT-NEXT: mov %i2, %o0
; V8-OPT-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
-; V8-OPT-NEXT: fadds %f1, %f0, %f0
+; V8-OPT-NEXT: fadds %f0, %f1, %f0
; V8-OPT-NEXT: st %f0, [%fp+-4]
; V8-OPT-NEXT: call __truncsfhf2
; V8-OPT-NEXT: ld [%fp+-4], %o0
@@ -346,13 +347,14 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; V8-UNOPT-LABEL: test_fadd:
; V8-UNOPT: ! %bb.0:
; V8-UNOPT-NEXT: save %sp, -104, %sp
-; V8-UNOPT-NEXT: call __extendhfsf2
-; V8-UNOPT-NEXT: lduh [%i0], %o0
-; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
+; V8-UNOPT-NEXT: lduh [%i0], %i2
+; V8-UNOPT-NEXT: st %i2, [%fp+-12] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __extendhfsf2
; V8-UNOPT-NEXT: lduh [%i1], %o0
-; V8-UNOPT-NEXT: fmovs %f0, %f1
-; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: ld [%fp+-12], %o0 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: call __extendhfsf2
+; V8-UNOPT-NEXT: st %f0, [%fp+-8]
+; V8-UNOPT-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
; V8-UNOPT-NEXT: fadds %f0, %f1, %f0
; V8-UNOPT-NEXT: st %f0, [%fp+-4]
; V8-UNOPT-NEXT: call __truncsfhf2
@@ -364,13 +366,14 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; V9-LABEL: test_fadd:
; V9: ! %bb.0:
; V9-NEXT: save %sp, -104, %sp
+; V9-NEXT: lduh [%i0], %i2
; V9-NEXT: call __extendhfsf2
-; V9-NEXT: lduh [%i0], %o0
+; V9-NEXT: lduh [%i1], %o0
; V9-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
; V9-NEXT: call __extendhfsf2
-; V9-NEXT: lduh [%i1], %o0
+; V9-NEXT: mov %i2, %o0
; V9-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
-; V9-NEXT: fadds %f1, %f0, %f0
+; V9-NEXT: fadds %f0, %f1, %f0
; V9-NEXT: st %f0, [%fp+-4]
; V9-NEXT: call __truncsfhf2
; V9-NEXT: ld [%fp+-4], %o0
@@ -381,14 +384,15 @@ define void @test_fadd(ptr %p, ptr %q) nounwind {
; SPARC64-LABEL: test_fadd:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: lduh [%i0], %i2
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: lduh [%i1], %o0
; SPARC64-NEXT: st %f0, [%fp+2043] ! 4-byte Folded Spill
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: lduh [%i1], %o0
+; SPARC64-NEXT: mov %i2, %o0
; SPARC64-NEXT: ld [%fp+2043], %f1 ! 4-byte Folded Reload
; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: fadds %f1, %f0, %f1
+; SPARC64-NEXT: fadds %f0, %f1, %f1
; SPARC64-NEXT: sth %o0, [%i0]
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore
@@ -403,13 +407,14 @@ define void @test_fmul(ptr %p, ptr %q) nounwind {
; V8-OPT-LABEL: test_fmul:
; V8-OPT: ! %bb.0:
; V8-OPT-NEXT: save %sp, -104, %sp
+; V8-OPT-NEXT: lduh [%i0], %i2
; V8-OPT-NEXT: call __extendhfsf2
-; V8-OPT-NEXT: lduh [%i0], %o0
+; V8-OPT-NEXT: lduh [%i1], %o0
; V8-OPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
; V8-OPT-NEXT: call __extendhfsf2
-; V8-OPT-NEXT: lduh [%i1], %o0
+; V8-OPT-NEXT: mov %i2, %o0
; V8-OPT-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
-; V8-OPT-NEXT: fmuls %f1, %f0, %f0
+; V8-OPT-NEXT: fmuls %f0, %f1, %f0
; V8-OPT-NEXT: st %f0, [%fp+-4]
; V8-OPT-NEXT: call __truncsfhf2
; V8-OPT-NEXT: ld [%fp+-4], %o0
@@ -420,13 +425,14 @@ define void @test_fmul(ptr %p, ptr %q) nounwind {
; V8-UNOPT-LABEL: test_fmul:
; V8-UNOPT: ! %bb.0:
; V8-UNOPT-NEXT: save %sp, -104, %sp
-; V8-UNOPT-NEXT: call __extendhfsf2
-; V8-UNOPT-NEXT: lduh [%i0], %o0
-; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
+; V8-UNOPT-NEXT: lduh [%i0], %i2
+; V8-UNOPT-NEXT: st %i2, [%fp+-12] ! 4-byte Folded Spill
; V8-UNOPT-NEXT: call __extendhfsf2
; V8-UNOPT-NEXT: lduh [%i1], %o0
-; V8-UNOPT-NEXT: fmovs %f0, %f1
-; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: ld [%fp+-12], %o0 ! 4-byte Folded Reload
+; V8-UNOPT-NEXT: call __extendhfsf2
+; V8-UNOPT-NEXT: st %f0, [%fp+-8]
+; V8-UNOPT-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
; V8-UNOPT-NEXT: fmuls %f0, %f1, %f0
; V8-UNOPT-NEXT: st %f0, [%fp+-4]
; V8-UNOPT-NEXT: call __truncsfhf2
@@ -438,13 +444,14 @@ define void @test_fmul(ptr %p, ptr %q) nounwind {
; V9-LABEL: test_fmul:
; V9: ! %bb.0:
; V9-NEXT: save %sp, -104, %sp
+; V9-NEXT: lduh [%i0], %i2
; V9-NEXT: call __extendhfsf2
-; V9-NEXT: lduh [%i0], %o0
+; V9-NEXT: lduh [%i1], %o0
; V9-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
; V9-NEXT: call __extendhfsf2
-; V9-NEXT: lduh [%i1], %o0
+; V9-NEXT: mov %i2, %o0
; V9-NEXT: ld [%fp+-8], %f1 ! 4-byte Folded Reload
-; V9-NEXT: fmuls %f1, %f0, %f0
+; V9-NEXT: fmuls %f0, %f1, %f0
; V9-NEXT: st %f0, [%fp+-4]
; V9-NEXT: call __truncsfhf2
; V9-NEXT: ld [%fp+-4], %o0
@@ -455,14 +462,15 @@ define void @test_fmul(ptr %p, ptr %q) nounwind {
; SPARC64-LABEL: test_fmul:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -192, %sp
+; SPARC64-NEXT: lduh [%i0], %i2
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: lduh [%i0], %o0
+; SPARC64-NEXT: lduh [%i1], %o0
; SPARC64-NEXT: st %f0, [%fp+2043] ! 4-byte Folded Spill
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: lduh [%i1], %o0
+; SPARC64-NEXT: mov %i2, %o0
; SPARC64-NEXT: ld [%fp+2043], %f1 ! 4-byte Folded Reload
; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: fmuls %f1, %f0, %f1
+; SPARC64-NEXT: fmuls %f0, %f1, %f1
; SPARC64-NEXT: sth %o0, [%i0]
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore
diff --git a/llvm/test/CodeGen/SPARC/half.ll b/llvm/test/CodeGen/SPARC/half.ll
index e1e0c7cbd24e9..5db5176fe39cd 100644
--- a/llvm/test/CodeGen/SPARC/half.ll
+++ b/llvm/test/CodeGen/SPARC/half.ll
@@ -9,21 +9,13 @@
define void @store(half %x, ptr %p) nounwind {
; SPARC32-LABEL: store:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: sth %o0, [%i1]
-; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: sth %o0, [%o1]
;
; SPARC64-LABEL: store:
; SPARC64: ! %bb.0:
-; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
-; SPARC64-NEXT: sth %o0, [%i1]
-; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: sth %o0, [%o1]
store half %x, ptr %p
ret void
}
@@ -31,19 +23,13 @@ define void @store(half %x, ptr %p) nounwind {
define half @return(ptr %p) nounwind {
; SPARC32-LABEL: return:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: lduh [%i0], %o0
-; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: lduh [%o0], %o0
;
; SPARC64-LABEL: return:
; SPARC64: ! %bb.0:
-; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: lduh [%i0], %o0
-; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: lduh [%o0], %o0
%r = load half, ptr %p
ret half %r
}
@@ -201,19 +187,13 @@ define void @test_bitcast_to_half(ptr %addr, i16 %in) nounwind {
define half @from_bits(i16 %x) nounwind {
; SPARC32-LABEL: from_bits:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: nop
;
; SPARC64-LABEL: from_bits:
; SPARC64: ! %bb.0:
-; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: srl %i0, 0, %o0
-; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
%res = bitcast i16 %x to half
ret half %res
}
@@ -221,24 +201,13 @@ define half @from_bits(i16 %x) nounwind {
define i16 @to_bits(half %x) nounwind {
; SPARC32-LABEL: to_bits:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: sethi 4194240, %i0
-; SPARC32-NEXT: andn %o0, %i0, %i0
-; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: nop
;
; SPARC64-LABEL: to_bits:
; SPARC64: ! %bb.0:
-; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: retl
; SPARC64-NEXT: nop
-; SPARC64-NEXT: sethi 63, %i0
-; SPARC64-NEXT: or %i0, 1023, %i0
-; SPARC64-NEXT: and %o0, %i0, %i0
-; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
%res = bitcast half %x to i16
ret i16 %res
}
@@ -708,37 +677,47 @@ define void @test_trunc64_vec4(<4 x double> %a, ptr %p) nounwind {
define float @test_sitofp_fadd_i32(i32 %a, ptr %b) nounwind {
; SPARC32-LABEL: test_sitofp_fadd_i32:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -104, %sp
-; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: lduh [%i1], %o0
+; SPARC32-NEXT: save %sp, -112, %sp
+; SPARC32-NEXT: lduh [%i1], %i1
; SPARC32-NEXT: st %i0, [%fp+-4]
-; SPARC32-NEXT: ld [%fp+-4], %f1
-; SPARC32-NEXT: st %f0, [%fp+-12] ! 4-byte Folded Spill
-; SPARC32-NEXT: fitos %f1, %f0
+; SPARC32-NEXT: ld [%fp+-4], %f0
+; SPARC32-NEXT: fitos %f0, %f0
; SPARC32-NEXT: st %f0, [%fp+-8]
; SPARC32-NEXT: call __truncsfhf2
; SPARC32-NEXT: ld [%fp+-8], %o0
; SPARC32-NEXT: call __extendhfsf2
; SPARC32-NEXT: nop
-; SPARC32-NEXT: ld [%fp+-12], %f1 ! 4-byte Folded Reload
-; SPARC32-NEXT: fadds %f1, %f0, %f0
+; SPARC32-NEXT: st %f0, [%fp+-16] ! 4-byte Folded Spill
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: mov %i1, %o0
+; SPARC32-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload
+; SPARC32-NEXT: fadds %f0, %f1, %f0
+; SPARC32-NEXT: st %f0, [%fp+-12]
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-12], %o0
+; SPARC32-NEXT: call __extendhfsf2
+; SPARC32-NEXT: nop
; SPARC32-NEXT: ret
; SPARC32-NEXT: restore
;
; SPARC64-LABEL: test_sitofp_fadd_i32:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -192, %sp
-; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: lduh [%i1], %o0
-; SPARC64-NEXT: st %f0, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: lduh [%i1], %i1
; SPARC64-NEXT: st %i0, [%fp+2043]
; SPARC64-NEXT: ld [%fp+2043], %f0
; SPARC64-NEXT: call __truncsfhf2
; SPARC64-NEXT: fitos %f0, %f1
; SPARC64-NEXT: call __extendhfsf2
; SPARC64-NEXT: nop
+; SPARC64-NEXT: st %f0, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: mov %i1, %o0
; SPARC64-NEXT: ld [%fp+2039], %f1 ! 4-byte Folded Reload
-; SPARC64-NEXT: fadds %f1, %f0, %f0
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: fadds %f0, %f1, %f1
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: nop
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore
%tmp0 = load half, ptr %b
@@ -752,10 +731,8 @@ define half @PR40273(half) nounwind {
; SPARC32-LABEL: PR40273:
; SPARC32: ! %bb.0:
; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
+; SPARC32-NEXT: mov %i0, %o0
; SPARC32-NEXT: sethi %hi(.LCPI24_0), %i0
; SPARC32-NEXT: ld [%i0+%lo(.LCPI24_0)], %f1
; SPARC32-NEXT: fcmps %f0, %f1
@@ -763,36 +740,26 @@ define half @PR40273(half) nounwind {
; SPARC32-NEXT: fbne .LBB24_2
; SPARC32-NEXT: nop
; SPARC32-NEXT: ! %bb.1:
-; SPARC32-NEXT: ba .LBB24_3
-; SPARC32-NEXT: mov %g0, %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %g0, %o0
; SPARC32-NEXT: .LBB24_2:
-; SPARC32-NEXT: mov 4, %i0
-; SPARC32-NEXT: .LBB24_3:
-; SPARC32-NEXT: sethi %hi(.LCPI24_1), %i1
-; SPARC32-NEXT: add %i1, %lo(.LCPI24_1), %i1
-; SPARC32-NEXT: ld [%i1+%i0], %f0
+; SPARC32-NEXT: sethi 15, %i0
; SPARC32-NEXT: ret
; SPARC32-NEXT: restore
;
; SPARC64-LABEL: PR40273:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
+; SPARC64-NEXT: srl %i0, 0, %o0
; SPARC64-NEXT: sethi %h44(.LCPI24_0), %i0
; SPARC64-NEXT: add %i0, %m44(.LCPI24_0), %i0
; SPARC64-NEXT: sllx %i0, 12, %i0
; SPARC64-NEXT: ld [%i0+%l44(.LCPI24_0)], %f1
; SPARC64-NEXT: mov %g0, %i0
+; SPARC64-NEXT: sethi 15, %i1
; SPARC64-NEXT: fcmps %fcc0, %f0, %f1
-; SPARC64-NEXT: movne %fcc0, 4, %i0
-; SPARC64-NEXT: sethi %h44(.LCPI24_1), %i1
-; SPARC64-NEXT: add %i1, %m44(.LCPI24_1), %i1
-; SPARC64-NEXT: sllx %i1, 12, %i1
-; SPARC64-NEXT: add %i1, %l44(.LCPI24_1), %i1
-; SPARC64-NEXT: ld [%i1+%i0], %f0
+; SPARC64-NEXT: movne %fcc0, %i1, %i0
; SPARC64-NEXT: ret
; SPARC64-NEXT: restore
%2 = fcmp une half %0, 0xH0000
@@ -803,25 +770,16 @@ define half @PR40273(half) nounwind {
define half @fabs(half %x) nounwind {
; SPARC32-LABEL: fabs:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: fabss %f0, %f0
-; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: sethi 4194272, %o1
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: andn %o0, %o1, %o0
;
; SPARC64-LABEL: fabs:
; SPARC64: ! %bb.0:
-; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
-; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
-; SPARC64-NEXT: fabss %f0, %f0
-; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: sethi 31, %o1
+; SPARC64-NEXT: or %o1, 1023, %o1
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: and %o0, %o1, %o0
%a = call half @llvm.fabs.f16(half %x)
ret half %a
}
@@ -829,40 +787,19 @@ define half @fabs(half %x) nounwind {
define half @fcopysign(half %x, half %y) nounwind {
; SPARC32-LABEL: fcopysign:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: sethi 2097152, %i0
-; SPARC32-NEXT: and %i1, %i0, %i0
-; SPARC32-NEXT: cmp %i0, 0
-; SPARC32-NEXT: be .LBB26_2
-; SPARC32-NEXT: fabss %f0, %f0
-; SPARC32-NEXT: ! %bb.1:
-; SPARC32-NEXT: fnegs %f0, %f0
-; SPARC32-NEXT: .LBB26_2:
-; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: sethi 4194272, %o2
+; SPARC32-NEXT: and %o1, %o2, %o1
+; SPARC32-NEXT: andn %o0, %o2, %o0
+; SPARC32-NEXT: retl
+; SPARC32-NEXT: or %o0, %o1, %o0
;
; SPARC64-LABEL: fcopysign:
; SPARC64: ! %bb.0:
-; SPARC64-NEXT: save %sp, -192, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: st %f3, [%fp+2039]
-; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
-; SPARC64-NEXT: ld [%fp+2039], %f1 ! 4-byte Folded Reload
-; SPARC64-NEXT: st %f1, [%fp+2043]
-; SPARC64-NEXT: ld [%fp+2043], %i0
-; SPARC64-NEXT: sethi 2097152, %i1
-; SPARC64-NEXT: and %i0, %i1, %i0
-; SPARC64-NEXT: fabss %f0, %f0
-; SPARC64-NEXT: fnegs %f0, %f1
-; SPARC64-NEXT: cmp %i0, 0
-; SPARC64-NEXT: fmovsne %icc, %f1, %f0
-; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: sethi 4194272, %o2
+; SPARC64-NEXT: and %o1, %o2, %o1
+; SPARC64-NEXT: andn %o0, %o2, %o0
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: or %o0, %o1, %o0
%a = call half @llvm.copysign.f16(half %x, half %y)
ret half %a
}
diff --git a/llvm/test/CodeGen/SPARC/llvm.sincos.ll b/llvm/test/CodeGen/SPARC/llvm.sincos.ll
index 87b9c8e7ba47b..8d0d50f67e3f5 100644
--- a/llvm/test/CodeGen/SPARC/llvm.sincos.ll
+++ b/llvm/test/CodeGen/SPARC/llvm.sincos.ll
@@ -10,74 +10,84 @@ define { half, half } @test_sincos_f16(half %a) #0 {
; SPARC32-LABEL: test_sincos_f16:
; SPARC32: ! %bb.0:
; SPARC32-NEXT: save %sp, -104, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: st %f0, [%fp+-4]
-; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %f0, [%fp+-12]
+; SPARC32-NEXT: ld [%fp+-12], %i0
; SPARC32-NEXT: call sinf
; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill
+; SPARC32-NEXT: st %f0, [%fp+-8]
; SPARC32-NEXT: call cosf
; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: fmovs %f0, %f1
-; SPARC32-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-8], %o0
+; SPARC32-NEXT: mov %o0, %i0
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-4], %o0
; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: restore %g0, %o0, %o1
;
; SPARC64-LABEL: test_sincos_f16:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -192, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
-; SPARC64-NEXT: st %f0, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: srl %i0, 0, %o0
+; SPARC64-NEXT: st %f0, [%fp+2043] ! 4-byte Folded Spill
; SPARC64-NEXT: fmovs %f0, %f1
; SPARC64-NEXT: call sinf
; SPARC64-NEXT: nop
-; SPARC64-NEXT: st %f0, [%fp+2043] ! 4-byte Folded Spill
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: mov %o0, %i0
; SPARC64-NEXT: call cosf
-; SPARC64-NEXT: ld [%fp+2039], %f1
+; SPARC64-NEXT: ld [%fp+2043], %f1
; SPARC64-NEXT: fmovs %f0, %f1
-; SPARC64-NEXT: ld [%fp+2043], %f0 ! 4-byte Folded Reload
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: restore %g0, %o0, %o1
;
; GNU32-LABEL: test_sincos_f16:
; GNU32: ! %bb.0:
-; GNU32-NEXT: save %sp, -104, %sp
-; GNU32-NEXT: call __truncsfhf2
-; GNU32-NEXT: mov %i0, %o0
+; GNU32-NEXT: save %sp, -112, %sp
; GNU32-NEXT: call __extendhfsf2
-; GNU32-NEXT: nop
+; GNU32-NEXT: mov %i0, %o0
; GNU32-NEXT: st %f0, [%fp+-12]
; GNU32-NEXT: ld [%fp+-12], %o0
; GNU32-NEXT: add %fp, -4, %o1
; GNU32-NEXT: call sincosf
; GNU32-NEXT: add %fp, -8, %o2
; GNU32-NEXT: ld [%fp+-4], %f0
-; GNU32-NEXT: ld [%fp+-8], %f1
+; GNU32-NEXT: st %f0, [%fp+-20]
+; GNU32-NEXT: ld [%fp+-8], %f0
+; GNU32-NEXT: st %f0, [%fp+-16]
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-20], %o0
+; GNU32-NEXT: mov %o0, %i0
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-16], %o0
; GNU32-NEXT: ret
-; GNU32-NEXT: restore
+; GNU32-NEXT: restore %g0, %o0, %o1
;
; GNU64-LABEL: test_sincos_f16:
; GNU64: ! %bb.0:
; GNU64-NEXT: save %sp, -192, %sp
-; GNU64-NEXT: call __truncsfhf2
-; GNU64-NEXT: nop
; GNU64-NEXT: call __extendhfsf2
-; GNU64-NEXT: nop
+; GNU64-NEXT: srl %i0, 0, %o0
; GNU64-NEXT: add %fp, 2043, %o1
; GNU64-NEXT: add %fp, 2039, %o2
; GNU64-NEXT: fmovs %f0, %f1
; GNU64-NEXT: call sincosf
; GNU64-NEXT: nop
-; GNU64-NEXT: ld [%fp+2043], %f0
+; GNU64-NEXT: call __truncsfhf2
+; GNU64-NEXT: ld [%fp+2043], %f1
+; GNU64-NEXT: mov %o0, %i0
+; GNU64-NEXT: call __truncsfhf2
; GNU64-NEXT: ld [%fp+2039], %f1
; GNU64-NEXT: ret
-; GNU64-NEXT: restore
+; GNU64-NEXT: restore %g0, %o0, %o1
%result = call { half, half } @llvm.sincos.f16(half %a)
ret { half, half } %result
}
@@ -85,61 +95,63 @@ define { half, half } @test_sincos_f16(half %a) #0 {
define half @test_sincos_f16_only_use_sin(half %a) #0 {
; SPARC32-LABEL: test_sincos_f16_only_use_sin:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: save %sp, -104, %sp
; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %f0, [%fp+-8]
; SPARC32-NEXT: call sinf
+; SPARC32-NEXT: ld [%fp+-8], %o0
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __truncsfhf2
; SPARC32-NEXT: ld [%fp+-4], %o0
; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: restore %g0, %o0, %o0
;
; SPARC64-LABEL: test_sincos_f16_only_use_sin:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
+; SPARC64-NEXT: srl %i0, 0, %o0
; SPARC64-NEXT: fmovs %f0, %f1
; SPARC64-NEXT: call sinf
; SPARC64-NEXT: nop
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: restore %g0, %o0, %o0
;
; GNU32-LABEL: test_sincos_f16_only_use_sin:
; GNU32: ! %bb.0:
-; GNU32-NEXT: save %sp, -104, %sp
-; GNU32-NEXT: call __truncsfhf2
-; GNU32-NEXT: mov %i0, %o0
+; GNU32-NEXT: save %sp, -112, %sp
; GNU32-NEXT: call __extendhfsf2
-; GNU32-NEXT: nop
+; GNU32-NEXT: mov %i0, %o0
; GNU32-NEXT: st %f0, [%fp+-12]
; GNU32-NEXT: ld [%fp+-12], %o0
; GNU32-NEXT: add %fp, -4, %o1
; GNU32-NEXT: call sincosf
; GNU32-NEXT: add %fp, -8, %o2
; GNU32-NEXT: ld [%fp+-4], %f0
+; GNU32-NEXT: st %f0, [%fp+-16]
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-16], %o0
; GNU32-NEXT: ret
-; GNU32-NEXT: restore
+; GNU32-NEXT: restore %g0, %o0, %o0
;
; GNU64-LABEL: test_sincos_f16_only_use_sin:
; GNU64: ! %bb.0:
; GNU64-NEXT: save %sp, -192, %sp
-; GNU64-NEXT: call __truncsfhf2
-; GNU64-NEXT: nop
; GNU64-NEXT: call __extendhfsf2
-; GNU64-NEXT: nop
+; GNU64-NEXT: srl %i0, 0, %o0
; GNU64-NEXT: add %fp, 2043, %o1
; GNU64-NEXT: add %fp, 2039, %o2
; GNU64-NEXT: fmovs %f0, %f1
; GNU64-NEXT: call sincosf
; GNU64-NEXT: nop
-; GNU64-NEXT: ld [%fp+2043], %f0
+; GNU64-NEXT: call __truncsfhf2
+; GNU64-NEXT: ld [%fp+2043], %f1
; GNU64-NEXT: ret
-; GNU64-NEXT: restore
+; GNU64-NEXT: restore %g0, %o0, %o0
%result = call { half, half } @llvm.sincos.f16(half %a)
%result.0 = extractvalue { half, half } %result, 0
ret half %result.0
@@ -148,61 +160,63 @@ define half @test_sincos_f16_only_use_sin(half %a) #0 {
define half @test_sincos_f16_only_use_cos(half %a) #0 {
; SPARC32-LABEL: test_sincos_f16_only_use_cos:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -96, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: save %sp, -104, %sp
; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %f0, [%fp+-8]
; SPARC32-NEXT: call cosf
+; SPARC32-NEXT: ld [%fp+-8], %o0
+; SPARC32-NEXT: st %f0, [%fp+-4]
+; SPARC32-NEXT: call __truncsfhf2
; SPARC32-NEXT: ld [%fp+-4], %o0
; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: restore %g0, %o0, %o0
;
; SPARC64-LABEL: test_sincos_f16_only_use_cos:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -176, %sp
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
+; SPARC64-NEXT: srl %i0, 0, %o0
; SPARC64-NEXT: fmovs %f0, %f1
; SPARC64-NEXT: call cosf
; SPARC64-NEXT: nop
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: restore %g0, %o0, %o0
;
; GNU32-LABEL: test_sincos_f16_only_use_cos:
; GNU32: ! %bb.0:
-; GNU32-NEXT: save %sp, -104, %sp
-; GNU32-NEXT: call __truncsfhf2
-; GNU32-NEXT: mov %i0, %o0
+; GNU32-NEXT: save %sp, -112, %sp
; GNU32-NEXT: call __extendhfsf2
-; GNU32-NEXT: nop
+; GNU32-NEXT: mov %i0, %o0
; GNU32-NEXT: st %f0, [%fp+-12]
; GNU32-NEXT: ld [%fp+-12], %o0
; GNU32-NEXT: add %fp, -4, %o1
; GNU32-NEXT: call sincosf
; GNU32-NEXT: add %fp, -8, %o2
; GNU32-NEXT: ld [%fp+-8], %f0
+; GNU32-NEXT: st %f0, [%fp+-16]
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-16], %o0
; GNU32-NEXT: ret
-; GNU32-NEXT: restore
+; GNU32-NEXT: restore %g0, %o0, %o0
;
; GNU64-LABEL: test_sincos_f16_only_use_cos:
; GNU64: ! %bb.0:
; GNU64-NEXT: save %sp, -192, %sp
-; GNU64-NEXT: call __truncsfhf2
-; GNU64-NEXT: nop
; GNU64-NEXT: call __extendhfsf2
-; GNU64-NEXT: nop
+; GNU64-NEXT: srl %i0, 0, %o0
; GNU64-NEXT: add %fp, 2043, %o1
; GNU64-NEXT: add %fp, 2039, %o2
; GNU64-NEXT: fmovs %f0, %f1
; GNU64-NEXT: call sincosf
; GNU64-NEXT: nop
-; GNU64-NEXT: ld [%fp+2039], %f0
+; GNU64-NEXT: call __truncsfhf2
+; GNU64-NEXT: ld [%fp+2039], %f1
; GNU64-NEXT: ret
-; GNU64-NEXT: restore
+; GNU64-NEXT: restore %g0, %o0, %o0
%result = call { half, half } @llvm.sincos.f16(half %a)
%result.1 = extractvalue { half, half } %result, 1
ret half %result.1
@@ -211,132 +225,157 @@ define half @test_sincos_f16_only_use_cos(half %a) #0 {
define { <2 x half>, <2 x half> } @test_sincos_v2f16(<2 x half> %a) #0 {
; SPARC32-LABEL: test_sincos_v2f16:
; SPARC32: ! %bb.0:
-; SPARC32-NEXT: save %sp, -112, %sp
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i1, %o0
+; SPARC32-NEXT: save %sp, -128, %sp
; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: st %f0, [%fp+-12] ! 4-byte Folded Spill
-; SPARC32-NEXT: call __truncsfhf2
-; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: mov %i1, %o0
+; SPARC32-NEXT: st %f0, [%fp+-28]
; SPARC32-NEXT: call __extendhfsf2
-; SPARC32-NEXT: nop
-; SPARC32-NEXT: st %f0, [%fp+-8]
-; SPARC32-NEXT: ld [%fp+-12], %f0 ! 4-byte Folded Reload
-; SPARC32-NEXT: st %f0, [%fp+-4]
-; SPARC32-NEXT: ld [%fp+-8], %i0
-; SPARC32-NEXT: call sinf
; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: st %f0, [%fp+-12] ! 4-byte Folded Spill
-; SPARC32-NEXT: ld [%fp+-4], %i1
-; SPARC32-NEXT: call sinf
-; SPARC32-NEXT: mov %i1, %o0
-; SPARC32-NEXT: st %f0, [%fp+-16] ! 4-byte Folded Spill
+; SPARC32-NEXT: st %f0, [%fp+-32]
+; SPARC32-NEXT: ld [%fp+-28], %i0
; SPARC32-NEXT: call cosf
; SPARC32-NEXT: mov %i0, %o0
-; SPARC32-NEXT: st %f0, [%fp+-20] ! 4-byte Folded Spill
+; SPARC32-NEXT: st %f0, [%fp+-20]
+; SPARC32-NEXT: ld [%fp+-32], %i1
; SPARC32-NEXT: call cosf
; SPARC32-NEXT: mov %i1, %o0
-; SPARC32-NEXT: fmovs %f0, %f3
-; SPARC32-NEXT: ld [%fp+-12], %f0 ! 4-byte Folded Reload
-; SPARC32-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload
-; SPARC32-NEXT: ld [%fp+-20], %f2 ! 4-byte Folded Reload
+; SPARC32-NEXT: st %f0, [%fp+-12]
+; SPARC32-NEXT: call sinf
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %f0, [%fp+-24]
+; SPARC32-NEXT: call sinf
+; SPARC32-NEXT: mov %i1, %o0
+; SPARC32-NEXT: st %f0, [%fp+-16]
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-20], %o0
+; SPARC32-NEXT: sethi 63, %i0
+; SPARC32-NEXT: or %i0, 1023, %i0
+; SPARC32-NEXT: and %o0, %i0, %i4
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-12], %o0
+; SPARC32-NEXT: and %o0, %i0, %i2
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-24], %o0
+; SPARC32-NEXT: and %o0, %i0, %i1
+; SPARC32-NEXT: call __truncsfhf2
+; SPARC32-NEXT: ld [%fp+-16], %o0
+; SPARC32-NEXT: and %o0, %i0, %g2
+; SPARC32-NEXT: mov %g2, %i0
+; SPARC32-NEXT: ! kill: def $i2 killed $i2 killed $i2_i3
; SPARC32-NEXT: ret
-; SPARC32-NEXT: restore
+; SPARC32-NEXT: restore %g0, %i4, %o3
;
; SPARC64-LABEL: test_sincos_v2f16:
; SPARC64: ! %bb.0:
; SPARC64-NEXT: save %sp, -192, %sp
-; SPARC64-NEXT: st %f1, [%fp+2039] ! 4-byte Folded Spill
-; SPARC64-NEXT: fmovs %f3, %f1
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: nop
; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
+; SPARC64-NEXT: srl %i0, 0, %o0
; SPARC64-NEXT: st %f0, [%fp+2043] ! 4-byte Folded Spill
-; SPARC64-NEXT: call __truncsfhf2
-; SPARC64-NEXT: ld [%fp+2039], %f1
-; SPARC64-NEXT: call __extendhfsf2
-; SPARC64-NEXT: nop
-; SPARC64-NEXT: st %f0, [%fp+2031] ! 4-byte Folded Spill
; SPARC64-NEXT: fmovs %f0, %f1
; SPARC64-NEXT: call sinf
; SPARC64-NEXT: nop
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: mov %o0, %i0
+; SPARC64-NEXT: call __extendhfsf2
+; SPARC64-NEXT: srl %i1, 0, %o0
; SPARC64-NEXT: st %f0, [%fp+2039] ! 4-byte Folded Spill
+; SPARC64-NEXT: fmovs %f0, %f1
; SPARC64-NEXT: call sinf
-; SPARC64-NEXT: ld [%fp+2043], %f1
-; SPARC64-NEXT: st %f0, [%fp+2035] ! 4-byte Folded Spill
-; SPARC64-NEXT: call cosf
-; SPARC64-NEXT: ld [%fp+2031], %f1
-; SPARC64-NEXT: st %f0, [%fp+2031] ! 4-byte Folded Spill
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: mov %o0, %i1
; SPARC64-NEXT: call cosf
; SPARC64-NEXT: ld [%fp+2043], %f1
-; SPARC64-NEXT: fmovs %f0, %f3
-; SPARC64-NEXT: ld [%fp+2039], %f0 ! 4-byte Folded Reload
-; SPARC64-NEXT: ld [%fp+2035], %f1 ! 4-byte Folded Reload
-; SPARC64-NEXT: ld [%fp+2031], %f2 ! 4-byte Folded Reload
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: mov %o0, %i2
+; SPARC64-NEXT: call cosf
+; SPARC64-NEXT: ld [%fp+2039], %f1
+; SPARC64-NEXT: fmovs %f0, %f1
+; SPARC64-NEXT: call __truncsfhf2
+; SPARC64-NEXT: nop
; SPARC64-NEXT: ret
-; SPARC64-NEXT: restore
+; SPARC64-NEXT: restore %g0, %o0, %o3
;
; GNU32-LABEL: test_sincos_v2f16:
; GNU32: ! %bb.0:
-; GNU32-NEXT: save %sp, -120, %sp
-; GNU32-NEXT: call __truncsfhf2
-; GNU32-NEXT: mov %i1, %o0
-; GNU32-NEXT: call __extendhfsf2
-; GNU32-NEXT: nop
-; GNU32-NEXT: st %f0, [%fp+-28] ! 4-byte Folded Spill
-; GNU32-NEXT: call __truncsfhf2
-; GNU32-NEXT: mov %i0, %o0
+; GNU32-NEXT: save %sp, -144, %sp
; GNU32-NEXT: call __extendhfsf2
-; GNU32-NEXT: nop
-; GNU32-NEXT: st %f0, [%fp+-20]
-; GNU32-NEXT: ld [%fp+-20], %o0
+; GNU32-NEXT: mov %i1, %o0
+; GNU32-NEXT: st %f0, [%fp+-32]
+; GNU32-NEXT: ld [%fp+-32], %o0
; GNU32-NEXT: add %fp, -12, %o1
; GNU32-NEXT: call sincosf
; GNU32-NEXT: add %fp, -16, %o2
-; GNU32-NEXT: ld [%fp+-28], %f0 ! 4-byte Folded Reload
-; GNU32-NEXT: st %f0, [%fp+-24]
-; GNU32-NEXT: ld [%fp+-24], %o0
-; GNU32-NEXT: add %fp, -4, %o1
+; GNU32-NEXT: call __extendhfsf2
+; GNU32-NEXT: mov %i0, %o0
+; GNU32-NEXT: st %f0, [%fp+-28]
+; GNU32-NEXT: ld [%fp+-28], %o0
+; GNU32-NEXT: add %fp, -20, %o1
; GNU32-NEXT: call sincosf
-; GNU32-NEXT: add %fp, -8, %o2
+; GNU32-NEXT: add %fp, -24, %o2
+; GNU32-NEXT: ld [%fp+-16], %f0
+; GNU32-NEXT: st %f0, [%fp+-44]
+; GNU32-NEXT: ld [%fp+-24], %f0
+; GNU32-NEXT: st %f0, [%fp+-36]
; GNU32-NEXT: ld [%fp+-12], %f0
-; GNU32-NEXT: ld [%fp+-4], %f1
-; GNU32-NEXT: ld [%fp+-16], %f2
-; GNU32-NEXT: ld [%fp+-8], %f3
+; GNU32-NEXT: st %f0, [%fp+-48]
+; GNU32-NEXT: ld [%fp+-20], %f0
+; GNU32-NEXT: st %f0, [%fp+-40]
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-44], %o0
+; GNU32-NEXT: sethi 63, %i0
+; GNU32-NEXT: or %i0, 1023, %i0
+; GNU32-NEXT: and %o0, %i0, %i4
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-36], %o0
+; GNU32-NEXT: and %o0, %i0, %i2
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-48], %o0
+; GNU32-NEXT: and %o0, %i0, %i1
+; GNU32-NEXT: call __truncsfhf2
+; GNU32-NEXT: ld [%fp+-40], %o0
+; GNU32-NEXT: and %o0, %i0, %g2
+; GNU32-NEXT: mov %g2, %i0
+; GNU32-NEXT: ! kill: def $i2 killed $i2 killed $i2_i3
; GNU32-NEXT: ret
-; GNU32-NEXT: restore
+; GNU32-NEXT: restore %g0, %i4, %o3
;
; GNU64-LABEL: test_sincos_v2f16:
; GNU64: ! %bb.0:
-; GNU64-NEXT: save %sp, -208, %sp
-; GNU64-NEXT: st %f1, [%fp+2023] ! 4-byte Folded Spill
-; GNU64-NEXT: fmovs %f3, %f1
-; GNU64-NEXT: call __truncsfhf2
-; GNU64-NEXT: nop
-; GNU64-NEXT: call __extendhfsf2
-; GNU64-NEXT: nop
-; GNU64-NEXT: st %f0, [%fp+2027] ! 4-byte Folded Spill
-; GNU64-NEXT: call __truncsfhf2
-; GNU64-NEXT: ld [%fp+2023], %f1
+; GNU64-NEXT: save %sp, -192, %sp
; GNU64-NEXT: call __extendhfsf2
-; GNU64-NEXT: nop
+; GNU64-NEXT: srl %i0, 0, %o0
; GNU64-NEXT: add %fp, 2035, %o1
; GNU64-NEXT: add %fp, 2031, %o2
; GNU64-NEXT: fmovs %f0, %f1
; GNU64-NEXT: call sincosf
; GNU64-NEXT: nop
+; GNU64-NEXT: call __extendhfsf2
+; GNU64-NEXT: srl %i1, 0, %o0
; GNU64-NEXT: add %fp, 2043, %o1
; GNU64-NEXT: add %fp, 2039, %o2
+; GNU64-NEXT: fmovs %f0, %f1
; GNU64-NEXT: call sincosf
-; GNU64-NEXT: ld [%fp+2027], %f1
-; GNU64-NEXT: ld [%fp+2035], %f0
+; GNU64-NEXT: nop
+; GNU64-NEXT: call __truncsfhf2
+; GNU64-NEXT: ld [%fp+2035], %f1
+; GNU64-NEXT: mov %o0, %i0
+; GNU64-NEXT: call __truncsfhf2
; GNU64-NEXT: ld [%fp+2043], %f1
-; GNU64-NEXT: ld [%fp+2031], %f2
-; GNU64-NEXT: ld [%fp+2039], %f3
+; GNU64-NEXT: mov %o0, %i1
+; GNU64-NEXT: call __truncsfhf2
+; GNU64-NEXT: ld [%fp+2031], %f1
+; GNU64-NEXT: mov %o0, %i2
+; GNU64-NEXT: call __truncsfhf2
+; GNU64-NEXT: ld [%fp+2039], %f1
; GNU64-NEXT: ret
-; GNU64-NEXT: restore
+; GNU64-NEXT: restore %g0, %o0, %o3
%result = call { <2 x half>, <2 x half> } @llvm.sincos.v2f16(<2 x half> %a)
ret { <2 x half>, <2 x half> } %result
}
More information about the llvm-commits
mailing list