[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized FP double <-> single conversion (PR #179926)
Simon Tatham via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Feb 5 04:03:34 PST 2026
https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/179926
This commit provides assembly versions of the conversions both ways between double and float.
>From 35dd800cbe1eb4d571c47254530ee75e7b98f500 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Thu, 29 Jan 2026 16:12:53 +0000
Subject: [PATCH] [compiler-rt][ARM] Optimized FP double <-> single conversion
This commit provides assembly versions of the conversions both ways
between double and float.
---
compiler-rt/lib/builtins/CMakeLists.txt | 2 +
compiler-rt/lib/builtins/arm/extendsfdf2.S | 195 ++++++++++
compiler-rt/lib/builtins/arm/truncdfsf2.S | 198 ++++++++++
.../test/builtins/Unit/extendsfdf2new_test.c | 123 ++++++
.../test/builtins/Unit/truncdfsf2new_test.c | 367 ++++++++++++++++++
5 files changed, 885 insertions(+)
create mode 100644 compiler-rt/lib/builtins/arm/extendsfdf2.S
create mode 100644 compiler-rt/lib/builtins/arm/truncdfsf2.S
create mode 100644 compiler-rt/test/builtins/Unit/extendsfdf2new_test.c
create mode 100644 compiler-rt/test/builtins/Unit/truncdfsf2new_test.c
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 0c53781a51392..6b392c8eb22f0 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -456,6 +456,8 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
arm/gesf2.S
arm/unorddf2.S
arm/unordsf2.S
+ arm/extendsfdf2.S
+ arm/truncdfsf2.S
)
set_source_files_properties(${assembly_files}
PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
diff --git a/compiler-rt/lib/builtins/arm/extendsfdf2.S b/compiler-rt/lib/builtins/arm/extendsfdf2.S
new file mode 100644
index 0000000000000..21518d4a75b1a
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/extendsfdf2.S
@@ -0,0 +1,195 @@
+//===-- extendsfdf2.S - single- to double-precision FP conversion ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __extendsfdf2 function (single to double precision
+// floating point conversion) for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+ .syntax unified
+ .text
+ .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__extendsfdf2)
+ push {r4, lr}
+ vmov r0, s0
+ bl __aeabi_f2d
+ VMOV_TO_DOUBLE(d0, r0, r1)
+ pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__extendsfdf2, __aeabi_f2d)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_f2d)
+
+ // Start with the fast path, dealing with normalized single-precision inputs.
+ // We handle these as quickly as possible in straight-line code, and branch
+ // out of line to a single 'handle everything else' label which will have to
+ // figure out what kind of unusual thing has happened.
+
+ // Extend the exponent field by 3 bits, by shifting the sign bit off the top
+ // of r0 into the carry flag, shifting the rest of the input word right by 3,
+ // then using RRX to put the sign back. So we end up with a word shaped like
+ // the top half of a double, but the exponent field is still biased by the
+ // single-precision offset of 0x7f instead of the double-precision 0x3ff.
+ lsls r3, r0, #1
+ lsr r12, r3, #3
+ rrx r12, r12
+
+ // For a normalized number, the remaining steps are to rebias the exponent,
+ // recover the remaining 3 mantissa bits from r0 which aren't included in the
+ // word we've just made, and move both into the right output registers.
+ //
+ // But we must also check for the difficult cases. These occur when the input
+ // exponent is either 0 or 0xFF. Those two values can be identified by the
+ // property that exp XOR (exp << 1) has the top 7 bits all zero.
+
+ // Do the test for uncommon values. Instead of using a shifter operand in the
+ // obvious way (EOR output, r0, r0, lsl #1), we use the fact that the setup
+ // code above already has a shifted-left copy of the input word in r3. In
+ // Thumb, this makes the EORS a 16-bit instruction instead of 32-bit.
+ eors r3, r3, r0
+
+ // Now prepare the output, for normal inputs.
+ //
+ // We make this pair of instructions conditional on NE, i.e. we skip it if r3
+ // and r0 were actually equal (which could only happen if r0 was 0, i.e. the
+ // input was +0). This is fine, because in that situation the input wasn't
+ // normalized, so we aren't going to return this output anyway.
+ //
+ // The _point_ of conditionalizing these two instructions is that this way we
+ // have only one IT instruction on the fast path, and it's _here_, where this
+ // comment is, so that it comes immediately after the above 16-bit EORS and
+ // can be executed in the same cycle by Cortex-M3.
+ lslne xl, r0, #29 // xl now has the bottom 3 input mantissa bits
+ addne xh, r12, #(0x3ff - 0x7f) << 20 // rebias exponent in xh
+
+ // Finally, check whether the test word in r3 has its top 7 exponent bits
+ // zero. If not, we can return the fast-path answer.
+ tstne r3, #0x7f000000
+ bxne lr
+
+ // Now we've handled the fast-path cases as fast as we know how, what do we
+ // do next? We almost certainly don't have the input value in r0 any more,
+ // because we overwrote it by writing an unused output to xh:xl in the above
+ // code. Worse, we didn't _reliably_ overwrite it, because those writes to
+ // xh:xl might not have happened if the whole test word in r3 was zero. So
+ // where can we find the input bits?
+ //
+ // We have r3 = input XOR (input << 1). That's actually an invertible
+ // transformation, so in principle we could recover the full original input
+ // float from just r3. The quickest way to do that involves these five
+ // instructions (in any order, since they commute):
+ //
+ // EOR r3, r3, r3, lsl #16
+ // EOR r3, r3, r3, lsl #8
+ // EOR r3, r3, r3, lsl #4
+ // EOR r3, r3, r3, lsl #2
+ // EOR r3, r3, r3, lsl #1
+ //
+ // But that's rather slow, and we can do better. r12 contains most of the
+ // input bits in a more usable form: we inserted three zero bits between the
+ // sign and the top of the exponent, but everything from the input is there
+ // _somewhere_, except for the low 3 bits.
+ //
+ // However, on one code path below we'll use a subset of those EOR
+ // instructions to recover the low 3 bits of the input.
+
+ // First, find out whether the input exponent was 0 (zero or denormal), or
+ // 0xFF (infinity or NaN). We know it was one of the two, or we would have
+ // taken the early return from the fast path. So it's enough to test any
+ // single bit of the exponent in r12.
+ tst r12, #1<<27 // bit 27 is topmost bit of the 8-bit exponent
+ bne LOCAL_LABEL(inf_or_nan)
+
+ // If we didn't take that branch, we have a denormal or zero. Zeroes are
+ // likely to be common, so we'd prefer to handle those with highest priority.
+ //
+ // r3 = (input XOR (input << 1)) will take the values 0 or 0x80000000 for a
+ // zero input. So it contains precisely the right value to return in xh.
+ //
+ // The BICS here combines the zeroing of xl with the test of r3, because it
+ // sets Z if and only if the input was one of those two values, and if so,
+ // sets xl=0.
+ //
+ // Unfortunately this has the side effect of clobbering xl in the case where
+ // we _don't_ take the early return, so now we've lost our verbatim copy of
+ // the low 3 input bits! On the denormal-handling path we'll have to recover
+ // those from r3 more awkwardly. But denormal handling is rare, and slow
+ // anyway, so it's worth the awkwardness to save a cycle in the much more
+ // common case of a zero input.
+ bics xl, r3, #0x80000000 // EQ if output is zero
+ moveq xh, r3 // if so, copy input sign into xh
+ bxeq lr // and return
+
+ // Now we know we're dealing with a denormal, so we need to recover the whole
+ // input mantissa. Most of it is in r12, but those last three bits now need
+ // to be reconstructed from r3 by using part of the shift+EOR trick shown
+ // above. We only need the left shifts by 1 and by 2, because the other three
+ // don't affect the bottom 3 bits at all.
+ eor r3, r3, r3, lsl #2
+ eor r3, r3, r3, lsl #1
+ and r3, r3, #7
+
+ // Now r3 contains just the low bits of the mantissa. The rest of the
+ // mantissa is in r12, shifted right by 3 bits, so this instruction rebuilds
+ // the entire input mantissa in xh. (The exponent field is known to be zero,
+ // and the sign bit at the top of r12 is discarded by the left shift.)
+ orr xh, r3, r12, lsl #3
+
+ // Renormalize that input mantissa so that its high bit is at the top of the
+ // word.
+ clz r2, xh
+ lsl xh, xh, r2
+
+ // Compute the right sign + exponent to go with that mantissa.
+ //
+ // If the input mantissa had had only its low bit set, then the input float
+ // would be 2^-149, which has a double-precision exponent of 0x36a. In that
+ // situation we'd have r2 = 31 (output from the CLZ). So we need the output
+ // exponent to be (0x389 - r2). But the leading bit of the mantissa will
+ // increment the exponent field when we add them together, so in fact we want
+ // to calculate (0x388 - r2). That's particularly convenient, because 0x388
+ // fits in an AArch32 immediate field!
+ and r3, r12, #0x80000000 // get the sign bit from the top of r12
+ add r3, r3, #0x388 << 20 // add the exponent bias as calculated above
+ sub r3, r3, r2, lsl #20 // subtract the CLZ output
+
+ // Finally, distribute the normalized mantissa across the two output words,
+ // and combine the top half with the exponent we just computed.
+ lsls xl, xh, #21 // low word = low 3 bits of normalized mantissa
+ add xh, r3, xh, lsr #11 // high word = sign + exp + rest of mantissa
+ bx lr
+
+LOCAL_LABEL(inf_or_nan):
+ // We come here if the input was either infinity or a NaN. In this situation
+ // we can be sure that the instructions that set up the fast-path return
+ // value _did_ happen, because the input was nonzero. Also we branched away
+ // before the test for a zero input clobbered xl.
+ //
+ // So xh:xl will contain what _would_ be the right output value if 0xFF were
+ // not a special input: the exponent field will be 0x47f, and the sign and
+ // mantissa will be in place.
+ //
+ // This is almost exactly what we really want to return, except for two
+ // things: the exponent should be corrected to 0x7ff for an output infinity
+ // or NaN, and if the mantissa is nonzero at all (so that we're returning a
+ // NaN and not an infinity) then we should set its top bit to make it a quiet
+ // NaN.
+ orrs xh, xh, #0x7f000000 // set the missing bits in the exponent field
+ orrs r2, xl, xh, lsl #12 // is any bit of the mantissa set?
+ orrne xh, xh, #0x00080000 // if so, set the top mantissa bit
+ bx lr
+
+END_COMPILERRT_FUNCTION(__aeabi_f2d)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/truncdfsf2.S b/compiler-rt/lib/builtins/arm/truncdfsf2.S
new file mode 100644
index 0000000000000..d87fce8d1bcbb
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/truncdfsf2.S
@@ -0,0 +1,198 @@
+//===-- truncdfsf2.S - double- to single precision FP conversion ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __truncdfsf2 function (double to single precision
+// floating point conversion), with the IEEE-754 default rounding (to nearest,
+// ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+ .syntax unified
+ .text
+ .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__truncdfsf2)
+ push {r4, lr}
+ VMOV_FROM_DOUBLE(r0, r1, d0)
+ bl __aeabi_d2f
+ vmov s0, r0
+ pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__truncdfsf2, __aeabi_d2f)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_d2f)
+
+ // Start with the fast path, dealing with input values that give a normalized
+ // single-precision output. We handle these as quickly as possible in
+ // straight-line code, and branch out of line to a single 'handle everything
+ // else' label which will have to figure out what kind of unusual thing has
+ // happened.
+
+ // Split xh into the sign bit (in r3) and everything else (r2), so that we
+ // can change the width of the exponent field and then put the sign back on
+ // later.
+ bic r2, xh, #0x80000000
+ and r3, xh, #0x80000000
+
+ // Rebias the exponent, still in its double-precision location, to account
+ // for the difference between double- and single-precision exponents.
+ sub r2, r2, #(0x3ff-0x7f) << 20
+
+ // If the exponent field is now 0 or less, we have an underflow or an exact
+ // zero. If it's 0xFF or more, we have an overflow, or a NaN or infinity as
+ // input. Detect all of those in a combined test, and branch out of line.
+ cmp r2, #0x00100000 // LO if output too small
+ rsbshs r12, r2, #0x0ff00000 // otherwise, set LS if output too large
+ bls LOCAL_LABEL(uncommon) // so now LS means one or the other happened
+
+ // We've disposed of all the uncommon cases, so we know we're returning a
+ // normalized float, but we might still need to round it. Shift the round bit
+ // into the C flag, also setting Z if everything below that is zero.
+ lsls r12, xl, #4
+
+ // Put the result back together, by recombining the sign (in r3) with the
+ // exponent and top of the mantissa (in r2, needing to be shifted left 3
+ // bits), plus the top 3 bits of xl. The last of those is put on with an ADC
+ // instruction, which also rounds up if the bit we just shifted into C was
+ // set.
+ orr r2, r3, r2, lsl #3 // sign + exponent + most of mantissa
+ adc r0, r2, xl, lsr #29 // low 3 bits of mantissa + maybe round up
+
+ // If C=1 and Z=1, we need to round to even. Otherwise we're finished. So we
+ // conditionally return based on one of those flags, then clear the low
+ // output bit based on the other.
+ //
+ // Which way round? On the assumption that input mantissas are roughly
+ // uniformly distributed, _almost all_ input doubles will contain a 1 bit
+ // somewhere in the bottom 28 bits, so we return early in the vast majority
+ // of cases by testing Z first. If we tested C first, we'd expect to return
+ // early only half the time, costing two extra instructions half the time
+ // instead of 1/2^28 of the time.
+ //
+ // (That's a bit optimistic, because of course in some situations input
+ // mantissas _won't_ be that uniform. In particular, if you converted from a
+ // float, did a small amount of calculation in double, and converted back,
+ // the round-to-even case might come up more often. But at least _some_
+ // applications will be passing doubles that make use of the whole mantissa,
+ // so I think this is still the more sensible way round to do the test.)
+ bxne lr // return if Z=0
+ biccs r0, r0, #1 // Z=1, so round to even if C=1 too
+ bx lr // and now return unconditionally
+
+LOCAL_LABEL(uncommon):
+ // We come here if anything at all goes wrong on the fast path. We could have
+ // an interesting kind of input - zero, denormal, infinity or NaN - or we
+ // could have a normalized double-precision input too large or too small to
+ // yield a normalized single-precision output.
+ //
+ // Of the various cases, the most important one to handle quickly is a zero
+ // input, because those are probably fairly common. So the very first thing
+ // we do is test if the input is zero, and if so, return the same sign of
+ // zero by simply using xh as the return value.
+ orrs r12, xl, xh, lsl #1 // are all bits of xh:xl 0 except the sign bit?
+
+#ifndef __BIG_ENDIAN__
+ // In little-endian, xh (containing the desired sign bit) and r0 (the output
+ // register) aren't the same. This instruction can be skipped in big-endian,
+ // where the correct output value is already in r0.
+ moveq r0, xh
+#endif
+ bxeq lr
+
+ // Separate the remaining cases into three types: too small (underflow,
+ // whether or not the input was a denormal), too big (overflow or input
+ // infinity, which we treat the same in the absence of FP exceptions), and
+ // NaN.
+ //
+ // At this stage r2 contains the output exponent, rebiased to its
+ // single-precision value, but at bit 20 (that is, still in the
+ // double-precision position). Detect underflow by doing a signed comparison
+ // against the minimum normalized single-precision exponent.
+ cmp r2, #0x00100000
+ blt LOCAL_LABEL(underflow)
+
+ // Now figure out whether we had a NaN as input, by shifting xh left by a bit
+ // (discarding the sign) and setting the new low bit if xl != 0. This gives a
+ // value which is greater than 0xFFE00000 (in an unsigned comparison) for
+ // precisely NaN inputs.
+ cmp xl, #1 // set C if xl != 0
+ adc r12, xh, xh // shift that in to the bottom of xh
+ cmn r12, #0x00200000 // is the result > 0xFFE00000?
+ bhi LOCAL_LABEL(nan) // if so, go and handle a NaN
+
+ // If we're still here, we have a finite overflow, or an input infinity. We
+ // don't have to figure out which: we return an infinity of the appropriate
+ // sign in both cases. So keep just the sign of xh, and make an infinity out
+ // of the rest of the bits.
+ mvn r0, xh, lsr #31 // shift sign bit down to bit 0 and flip it
+ mvn r0, r0, lsl #8 // flip it back, putting 8 set bits below it
+ lsl r0, r0, #23 // and shift those 9 bits back up to the top
+ bx lr
+
+LOCAL_LABEL(nan):
+ // We have a double-precision NaN input. The Arm NaN handling rules say that
+ // we make the output single-precision NaN by keeping the sign and as much of
+ // the mantissa as possible (starting from the top bit). But we also set the
+ // top bit of the mantissa, which makes the output NaN quiet even if the
+ // input one was signaling.
+ //
+ // So this code looks a bit like a miniature version of the fast path: we
+ // keep the bottom 8 bits of the exponent in xh as the output exponent (we
+ // know it's all 1s, which is what we want), plus all the mantissa bits below
+ // it; shift all of that 3 bits left and recombine with the sign; then
+ // combine with the top 3 bits of xl. Finally, set the top mantissa bit.
+ bic r2, xh, #0xF0000000 // everything from xh we want to shift left
+ orr r0, r3, xl, lsr #29 // combine sign with low 3 output mantissa bits
+ orr r0, r0, r2, lsl #3 // combine that with the shifted-up value in r2
+ orr r0, r0, #0x00400000 // set the top mantissa bit to make it a QNaN
+ bx lr
+
+LOCAL_LABEL(underflow):
+ // We have an input value small enough to underflow. The basic strategy is to
+ // leave __funder to deal with the details.
+ //
+ // Normally __funder expects to get a value that's already been rounded, and
+ // will re-round it, for which it also needs to know which way the value has
+ // been rounded already. In this case we haven't rounded _yet_. Rather than
+ // carefully rounding to nearest, it's easier to just make the __funder input
+ // value by truncating the mantissa (i.e. round towards zero), and set the
+ // rounding direction accordingly.
+
+ // Rebias the exponent (again) to make an IEEE 754 underflow intermediate. If
+ // this still doesn't make r2 positive, then the result is so small that it
+ // will underflow to 0 anyway, so it doesn't really matter what exponent we
+ // do provide - we just clear the top 8 bits of r2 to ensure the sign is
+ // right and the exponent is _something_ small.
+ adds r2, r2, #0x0c000000 // exponent bias (still shifted down 3 bits)
+ bicmi r2, r2, #0xff000000 // handle exponent still being negative
+
+ // Test the bits we're going to shift off the mantissa, to see if any are
+ // zero. This will determine the rounding direction we pass to __funder,
+ // because although we never round _up_ on this path, we must still tell it
+ // whether the value we pass it was rounded down or was already exact.
+ lsls r12, xl, #3 // set Z if the intermediate value is exact
+
+ // Put together the intermediate value to pass to __funder.
+ orr r2, r3, r2, lsl #3 // sign + exponent + most of mantissa
+ orr r0, r2, xl, lsr #29 // combine with top 3 bits of xl
+
+ // Set the rounding direction flag based on the test above.
+ moveq r1, #0 // intermediate is exact
+ movne r1, #1 // intermediate is too small (we didn't round)
+
+ // And tailcall __funder to do the rest of the job.
+ b SYMBOL_NAME(__compiler_rt_funder)
+
+END_COMPILERRT_FUNCTION(__aeabi_d2f)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c b/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c
new file mode 100644
index 0000000000000..04446488f73bf
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c
@@ -0,0 +1,123 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_extendsfdf2
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// By default this test uses compareResultD to check the returned floats, which
+// accepts any returned NaN if the expected result is the canonical NaN value
+// 0x7ff8000000000000. For the Arm optimized FP implementation, which commits
+// to a more detailed handling of NaNs, we tighten up the check and include
+// some extra test cases specific to that NaN policy.
+#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP
+# define EXPECT_EXACT_RESULTS
+# define ARM_NAN_HANDLING
+#endif
+
+// Returns: a converted from float to double
+COMPILER_RT_ABI double __extendsfdf2(float a);
+
+int test__extendsfdf2(int line, uint32_t a_rep, uint64_t expected_rep) {
+ float a = fromRep32(a_rep);
+ double x = __extendsfdf2(a);
+#ifdef EXPECT_EXACT_RESULTS
+ int ret = toRep64(x) != expected_rep;
+#else
+ int ret = compareResultD(x, expected_rep);
+#endif
+
+ if (ret) {
+ printf("error at line %d: __extendsfdf2(%08" PRIx32 ") = %016" PRIx64
+ ", expected %016" PRIx64 "\n",
+ line, a_rep, toRep64(x), expected_rep);
+ }
+ return ret;
+}
+
+#define test__extendsfdf2(a,x) test__extendsfdf2(__LINE__,a,x)
+
+int main(void) {
+ int status = 0;
+
+ status |= test__extendsfdf2(0x00000001, 0x36a0000000000000);
+ status |= test__extendsfdf2(0x00000003, 0x36b8000000000000);
+ status |= test__extendsfdf2(0x00000005, 0x36c4000000000000);
+ status |= test__extendsfdf2(0x00000009, 0x36d2000000000000);
+ status |= test__extendsfdf2(0x00000011, 0x36e1000000000000);
+ status |= test__extendsfdf2(0x00000021, 0x36f0800000000000);
+ status |= test__extendsfdf2(0x00000041, 0x3700400000000000);
+ status |= test__extendsfdf2(0x00000081, 0x3710200000000000);
+ status |= test__extendsfdf2(0x00000101, 0x3720100000000000);
+ status |= test__extendsfdf2(0x00000201, 0x3730080000000000);
+ status |= test__extendsfdf2(0x00000401, 0x3740040000000000);
+ status |= test__extendsfdf2(0x00000801, 0x3750020000000000);
+ status |= test__extendsfdf2(0x00001001, 0x3760010000000000);
+ status |= test__extendsfdf2(0x00002001, 0x3770008000000000);
+ status |= test__extendsfdf2(0x00004001, 0x3780004000000000);
+ status |= test__extendsfdf2(0x00008001, 0x3790002000000000);
+ status |= test__extendsfdf2(0x00010001, 0x37a0001000000000);
+ status |= test__extendsfdf2(0x00020001, 0x37b0000800000000);
+ status |= test__extendsfdf2(0x00040001, 0x37c0000400000000);
+ status |= test__extendsfdf2(0x00080001, 0x37d0000200000000);
+ status |= test__extendsfdf2(0x00100001, 0x37e0000100000000);
+ status |= test__extendsfdf2(0x00200001, 0x37f0000080000000);
+ status |= test__extendsfdf2(0x00400001, 0x3800000040000000);
+ status |= test__extendsfdf2(0x00800001, 0x3810000020000000);
+ status |= test__extendsfdf2(0x01000001, 0x3820000020000000);
+ status |= test__extendsfdf2(0x20000001, 0x3c00000020000000);
+ status |= test__extendsfdf2(0x30000001, 0x3e00000020000000);
+ status |= test__extendsfdf2(0x3f800000, 0x3ff0000000000000);
+ status |= test__extendsfdf2(0x7f000000, 0x47e0000000000000);
+ status |= test__extendsfdf2(0x7f7fffff, 0x47efffffe0000000);
+ status |= test__extendsfdf2(0x7f800000, 0x7ff0000000000000);
+ status |= test__extendsfdf2(0xff000000, 0xc7e0000000000000);
+ status |= test__extendsfdf2(0xff7fffff, 0xc7efffffe0000000);
+ status |= test__extendsfdf2(0xff800000, 0xfff0000000000000);
+ status |= test__extendsfdf2(0x80800000, 0xb810000000000000);
+ status |= test__extendsfdf2(0x807fffff, 0xb80fffffc0000000);
+ status |= test__extendsfdf2(0x80400000, 0xb800000000000000);
+ status |= test__extendsfdf2(0x803fffff, 0xb7ffffff80000000);
+ status |= test__extendsfdf2(0x80000003, 0xb6b8000000000000);
+ status |= test__extendsfdf2(0x80000002, 0xb6b0000000000000);
+ status |= test__extendsfdf2(0x80000001, 0xb6a0000000000000);
+ status |= test__extendsfdf2(0x80000000, 0x8000000000000000);
+
+ // Test that the result of an operation is a NaN at all when it should be.
+ //
+ // In most configurations these tests' results are checked compared using
+ // compareResultD, so we set all the answers to the canonical NaN
+ // 0x7ff8000000000000, which causes compareResultF to accept any NaN
+ // encoding. We also use the same value as the input NaN in tests that have
+ // one, so that even in EXPECT_EXACT_RESULTS mode these tests should pass,
+ // because 0x7ff8000000000000 is still the exact expected NaN.
+ status |= test__extendsfdf2(0x7fc00000, 0x7ff8000000000000);
+
+#ifdef ARM_NAN_HANDLING
+ // Tests specific to the NaN handling of Arm hardware, mimicked by
+ // arm/extendsfdf2.S:
+ //
+ // - a quiet NaN is distinguished by the top mantissa bit being 1
+ //
+ // - converting a quiet NaN from float to double is done by copying
+ // the input mantissa bits to the top of the output mantissa and
+ // appending 0 bits below them
+ //
+ // - if the input is a signalling NaN, its top mantissa bit is set
+ // to turn it quiet, and then that quiet NaN is converted to
+ // double as above
+ status |= test__extendsfdf2(0x7faf53b1, 0x7ffdea7620000000);
+ status |= test__extendsfdf2(0x7fe111d3, 0x7ffc223a60000000);
+ status |= test__extendsfdf2(0xffaf53b1, 0xfffdea7620000000);
+ status |= test__extendsfdf2(0xffe111d3, 0xfffc223a60000000);
+
+#endif // ARM_NAN_HANDLING
+
+ return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c b/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c
new file mode 100644
index 0000000000000..0542f97643618
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c
@@ -0,0 +1,367 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_truncdfsf2
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// By default this test uses compareResultF to check the returned floats, which
+// accepts any returned NaN if the expected result is the canonical NaN value
+// 0x7fc00000. For the Arm optimized FP implementation, which commits to a more
+// detailed handling of NaNs, we tighten up the check and include some extra
+// test cases specific to that NaN policy.
+#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP
+# define EXPECT_EXACT_RESULTS
+# define ARM_NAN_HANDLING
+#endif
+
+// Returns: a converted from double to float
+COMPILER_RT_ABI float __truncdfsf2(double a);
+
+int test__truncdfsf2(int line, uint64_t a_rep, uint32_t expected_rep) {
+ double a = fromRep64(a_rep);
+ float x = __truncdfsf2(a);
+#ifdef EXPECT_EXACT_RESULTS
+ int ret = toRep32(x) != expected_rep;
+#else
+ int ret = compareResultF(x, expected_rep);
+#endif
+
+ if (ret) {
+ printf("error at line %d: __truncdfsf2(%016" PRIx64 ") = %08" PRIx32
+ ", expected %08" PRIx32 "\n",
+ line, a_rep, toRep32(x), expected_rep);
+ }
+ return ret;
+}
+
+#define test__truncdfsf2(a,x) test__truncdfsf2(__LINE__,a,x)
+
+int main(void) {
+ int status = 0;
+
+ status |= test__truncdfsf2(0x0000000000000001, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000002, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000004, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000008, 0x00000000);
+ status |= test__truncdfsf2(0x000000000000001a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000020, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000040, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000080, 0x00000000);
+ status |= test__truncdfsf2(0x000000000000019a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000200, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000400, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000000800, 0x00000000);
+ status |= test__truncdfsf2(0x000000000000189a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000002000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000004000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000008000, 0x00000000);
+ status |= test__truncdfsf2(0x000000000001789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000020000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000040000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000080000, 0x00000000);
+ status |= test__truncdfsf2(0x000000000016789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000200000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000400000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000000800000, 0x00000000);
+ status |= test__truncdfsf2(0x000000000156789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000002000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000004000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000008000000, 0x00000000);
+ status |= test__truncdfsf2(0x000000001456789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000020000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000040000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000080000000, 0x00000000);
+ status |= test__truncdfsf2(0x000000013465789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000000200000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000400000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000000800000000, 0x00000000);
+ status |= test__truncdfsf2(0x000000123456789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000002000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000004000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000008000000000, 0x00000000);
+ status |= test__truncdfsf2(0x000001123456789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000020000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000040000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000080000000000, 0x00000000);
+ status |= test__truncdfsf2(0x000010123456789a, 0x00000000);
+ status |= test__truncdfsf2(0x0000200000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000400000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0000800000000000, 0x00000000);
+ status |= test__truncdfsf2(0x000100123456789a, 0x00000000);
+ status |= test__truncdfsf2(0x0002000000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0004000000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0008000000000000, 0x00000000);
+ status |= test__truncdfsf2(0x0010000000000000, 0x00000000);
+ status |= test__truncdfsf2(0x36a0000000000000, 0x00000001);
+ status |= test__truncdfsf2(0x36b0000000000000, 0x00000002);
+ status |= test__truncdfsf2(0x36b2000000000000, 0x00000002);
+ status |= test__truncdfsf2(0x36b4000000000000, 0x00000002);
+ status |= test__truncdfsf2(0x36b6000000000000, 0x00000003);
+ status |= test__truncdfsf2(0x36b8000000000000, 0x00000003);
+ status |= test__truncdfsf2(0x36ba000000000000, 0x00000003);
+ status |= test__truncdfsf2(0x36bc000000000000, 0x00000004);
+ status |= test__truncdfsf2(0x36be000000000000, 0x00000004);
+ status |= test__truncdfsf2(0x36c0000000000000, 0x00000004);
+ status |= test__truncdfsf2(0x36c1000000000000, 0x00000004);
+ status |= test__truncdfsf2(0x36c2000000000000, 0x00000004);
+ status |= test__truncdfsf2(0x36c3000000000000, 0x00000005);
+ status |= test__truncdfsf2(0x36c4000000000000, 0x00000005);
+ status |= test__truncdfsf2(0x36c5000000000000, 0x00000005);
+ status |= test__truncdfsf2(0x36c6000000000000, 0x00000006);
+ status |= test__truncdfsf2(0x36c7000000000000, 0x00000006);
+ status |= test__truncdfsf2(0x36d0000000000000, 0x00000008);
+ status |= test__truncdfsf2(0x36d0800000000000, 0x00000008);
+ status |= test__truncdfsf2(0x36d1000000000000, 0x00000008);
+ status |= test__truncdfsf2(0x36d1800000000000, 0x00000009);
+ status |= test__truncdfsf2(0x36d2000000000000, 0x00000009);
+ status |= test__truncdfsf2(0x36d2800000000000, 0x00000009);
+ status |= test__truncdfsf2(0x36d3000000000000, 0x0000000a);
+ status |= test__truncdfsf2(0x36d3800000000000, 0x0000000a);
+ status |= test__truncdfsf2(0x36e0000000000000, 0x00000010);
+ status |= test__truncdfsf2(0x36e0400000000000, 0x00000010);
+ status |= test__truncdfsf2(0x36e0800000000000, 0x00000010);
+ status |= test__truncdfsf2(0x36e0c00000000000, 0x00000011);
+ status |= test__truncdfsf2(0x36e1000000000000, 0x00000011);
+ status |= test__truncdfsf2(0x36e1400000000000, 0x00000011);
+ status |= test__truncdfsf2(0x36e1800000000000, 0x00000012);
+ status |= test__truncdfsf2(0x36e1c00000000000, 0x00000012);
+ status |= test__truncdfsf2(0x36f0000000000000, 0x00000020);
+ status |= test__truncdfsf2(0x36f0200000000000, 0x00000020);
+ status |= test__truncdfsf2(0x36f0400000000000, 0x00000020);
+ status |= test__truncdfsf2(0x36f0600000000000, 0x00000021);
+ status |= test__truncdfsf2(0x36f0800000000000, 0x00000021);
+ status |= test__truncdfsf2(0x36f0a00000000000, 0x00000021);
+ status |= test__truncdfsf2(0x36f0c00000000000, 0x00000022);
+ status |= test__truncdfsf2(0x36f0e00000000000, 0x00000022);
+ status |= test__truncdfsf2(0x3700000000000000, 0x00000040);
+ status |= test__truncdfsf2(0x3700100000000000, 0x00000040);
+ status |= test__truncdfsf2(0x3700200000000000, 0x00000040);
+ status |= test__truncdfsf2(0x3700300000000000, 0x00000041);
+ status |= test__truncdfsf2(0x3700400000000000, 0x00000041);
+ status |= test__truncdfsf2(0x3700500000000000, 0x00000041);
+ status |= test__truncdfsf2(0x3700600000000000, 0x00000042);
+ status |= test__truncdfsf2(0x3700700000000000, 0x00000042);
+ status |= test__truncdfsf2(0x3710000000000000, 0x00000080);
+ status |= test__truncdfsf2(0x3710080000000000, 0x00000080);
+ status |= test__truncdfsf2(0x3710100000000000, 0x00000080);
+ status |= test__truncdfsf2(0x3710180000000000, 0x00000081);
+ status |= test__truncdfsf2(0x3710200000000000, 0x00000081);
+ status |= test__truncdfsf2(0x3710280000000000, 0x00000081);
+ status |= test__truncdfsf2(0x3710300000000000, 0x00000082);
+ status |= test__truncdfsf2(0x3710380000000000, 0x00000082);
+ status |= test__truncdfsf2(0x3720000000000000, 0x00000100);
+ status |= test__truncdfsf2(0x3720040000000000, 0x00000100);
+ status |= test__truncdfsf2(0x3720080000000000, 0x00000100);
+ status |= test__truncdfsf2(0x37200c0000000000, 0x00000101);
+ status |= test__truncdfsf2(0x3720100000000000, 0x00000101);
+ status |= test__truncdfsf2(0x3720140000000000, 0x00000101);
+ status |= test__truncdfsf2(0x3720180000000000, 0x00000102);
+ status |= test__truncdfsf2(0x37201c0000000000, 0x00000102);
+ status |= test__truncdfsf2(0x3730000000000000, 0x00000200);
+ status |= test__truncdfsf2(0x3730020000000000, 0x00000200);
+ status |= test__truncdfsf2(0x3730040000000000, 0x00000200);
+ status |= test__truncdfsf2(0x3730060000000000, 0x00000201);
+ status |= test__truncdfsf2(0x3730080000000000, 0x00000201);
+ status |= test__truncdfsf2(0x37300a0000000000, 0x00000201);
+ status |= test__truncdfsf2(0x37300c0000000000, 0x00000202);
+ status |= test__truncdfsf2(0x37300e0000000000, 0x00000202);
+ status |= test__truncdfsf2(0x3740000000000000, 0x00000400);
+ status |= test__truncdfsf2(0x3740010000000000, 0x00000400);
+ status |= test__truncdfsf2(0x3740020000000000, 0x00000400);
+ status |= test__truncdfsf2(0x3740030000000000, 0x00000401);
+ status |= test__truncdfsf2(0x3740040000000000, 0x00000401);
+ status |= test__truncdfsf2(0x3740050000000000, 0x00000401);
+ status |= test__truncdfsf2(0x3740060000000000, 0x00000402);
+ status |= test__truncdfsf2(0x3740070000000000, 0x00000402);
+ status |= test__truncdfsf2(0x3750000000000000, 0x00000800);
+ status |= test__truncdfsf2(0x3750008000000000, 0x00000800);
+ status |= test__truncdfsf2(0x3750010000000000, 0x00000800);
+ status |= test__truncdfsf2(0x3750018000000000, 0x00000801);
+ status |= test__truncdfsf2(0x3750020000000000, 0x00000801);
+ status |= test__truncdfsf2(0x3750028000000000, 0x00000801);
+ status |= test__truncdfsf2(0x3750030000000000, 0x00000802);
+ status |= test__truncdfsf2(0x3750038000000000, 0x00000802);
+ status |= test__truncdfsf2(0x3760000000000000, 0x00001000);
+ status |= test__truncdfsf2(0x3760004000000000, 0x00001000);
+ status |= test__truncdfsf2(0x3760008000000000, 0x00001000);
+ status |= test__truncdfsf2(0x376000c000000000, 0x00001001);
+ status |= test__truncdfsf2(0x3760010000000000, 0x00001001);
+ status |= test__truncdfsf2(0x3760014000000000, 0x00001001);
+ status |= test__truncdfsf2(0x3760018000000000, 0x00001002);
+ status |= test__truncdfsf2(0x376001c000000000, 0x00001002);
+ status |= test__truncdfsf2(0x3770000000000000, 0x00002000);
+ status |= test__truncdfsf2(0x3770002000000000, 0x00002000);
+ status |= test__truncdfsf2(0x3770004000000000, 0x00002000);
+ status |= test__truncdfsf2(0x3770006000000000, 0x00002001);
+ status |= test__truncdfsf2(0x3770008000000000, 0x00002001);
+ status |= test__truncdfsf2(0x377000a000000000, 0x00002001);
+ status |= test__truncdfsf2(0x377000c000000000, 0x00002002);
+ status |= test__truncdfsf2(0x377000e000000000, 0x00002002);
+ status |= test__truncdfsf2(0x3780000000000000, 0x00004000);
+ status |= test__truncdfsf2(0x3780001000000000, 0x00004000);
+ status |= test__truncdfsf2(0x3780002000000000, 0x00004000);
+ status |= test__truncdfsf2(0x3780003000000000, 0x00004001);
+ status |= test__truncdfsf2(0x3780004000000000, 0x00004001);
+ status |= test__truncdfsf2(0x3780005000000000, 0x00004001);
+ status |= test__truncdfsf2(0x3780006000000000, 0x00004002);
+ status |= test__truncdfsf2(0x3780007000000000, 0x00004002);
+ status |= test__truncdfsf2(0x3790000000000000, 0x00008000);
+ status |= test__truncdfsf2(0x3790000800000000, 0x00008000);
+ status |= test__truncdfsf2(0x3790001000000000, 0x00008000);
+ status |= test__truncdfsf2(0x3790001800000000, 0x00008001);
+ status |= test__truncdfsf2(0x3790002000000000, 0x00008001);
+ status |= test__truncdfsf2(0x3790002800000000, 0x00008001);
+ status |= test__truncdfsf2(0x3790003000000000, 0x00008002);
+ status |= test__truncdfsf2(0x3790003800000000, 0x00008002);
+ status |= test__truncdfsf2(0x37a0000000000000, 0x00010000);
+ status |= test__truncdfsf2(0x37a0000400000000, 0x00010000);
+ status |= test__truncdfsf2(0x37a0000800000000, 0x00010000);
+ status |= test__truncdfsf2(0x37a0000c00000000, 0x00010001);
+ status |= test__truncdfsf2(0x37a0001000000000, 0x00010001);
+ status |= test__truncdfsf2(0x37a0001400000000, 0x00010001);
+ status |= test__truncdfsf2(0x37a0001800000000, 0x00010002);
+ status |= test__truncdfsf2(0x37a0001c00000000, 0x00010002);
+ status |= test__truncdfsf2(0x37b0000000000000, 0x00020000);
+ status |= test__truncdfsf2(0x37b0000200000000, 0x00020000);
+ status |= test__truncdfsf2(0x37b0000400000000, 0x00020000);
+ status |= test__truncdfsf2(0x37b0000600000000, 0x00020001);
+ status |= test__truncdfsf2(0x37b0000800000000, 0x00020001);
+ status |= test__truncdfsf2(0x37b0000a00000000, 0x00020001);
+ status |= test__truncdfsf2(0x37b0000c00000000, 0x00020002);
+ status |= test__truncdfsf2(0x37b0000e00000000, 0x00020002);
+ status |= test__truncdfsf2(0x37c0000000000000, 0x00040000);
+ status |= test__truncdfsf2(0x37c0000100000000, 0x00040000);
+ status |= test__truncdfsf2(0x37c0000200000000, 0x00040000);
+ status |= test__truncdfsf2(0x37c0000300000000, 0x00040001);
+ status |= test__truncdfsf2(0x37c0000400000000, 0x00040001);
+ status |= test__truncdfsf2(0x37c0000500000000, 0x00040001);
+ status |= test__truncdfsf2(0x37c0000600000000, 0x00040002);
+ status |= test__truncdfsf2(0x37c0000700000000, 0x00040002);
+ status |= test__truncdfsf2(0x37d0000000000000, 0x00080000);
+ status |= test__truncdfsf2(0x37d0000080000000, 0x00080000);
+ status |= test__truncdfsf2(0x37d0000100000000, 0x00080000);
+ status |= test__truncdfsf2(0x37d0000180000000, 0x00080001);
+ status |= test__truncdfsf2(0x37d0000200000000, 0x00080001);
+ status |= test__truncdfsf2(0x37d0000280000000, 0x00080001);
+ status |= test__truncdfsf2(0x37d0000300000000, 0x00080002);
+ status |= test__truncdfsf2(0x37d0000380000000, 0x00080002);
+ status |= test__truncdfsf2(0x37e0000000000000, 0x00100000);
+ status |= test__truncdfsf2(0x37e0000040000000, 0x00100000);
+ status |= test__truncdfsf2(0x37e0000080000000, 0x00100000);
+ status |= test__truncdfsf2(0x37e00000c0000000, 0x00100001);
+ status |= test__truncdfsf2(0x37e0000100000000, 0x00100001);
+ status |= test__truncdfsf2(0x37e0000140000000, 0x00100001);
+ status |= test__truncdfsf2(0x37e0000180000000, 0x00100002);
+ status |= test__truncdfsf2(0x37e00001c0000000, 0x00100002);
+ status |= test__truncdfsf2(0x37f0000000000000, 0x00200000);
+ status |= test__truncdfsf2(0x37f0000020000000, 0x00200000);
+ status |= test__truncdfsf2(0x37f000003fffffff, 0x00200000);
+ status |= test__truncdfsf2(0x37f0000040000000, 0x00200000);
+ status |= test__truncdfsf2(0x37f0000040000001, 0x00200001);
+ status |= test__truncdfsf2(0x37f0000060000000, 0x00200001);
+ status |= test__truncdfsf2(0x37f0000080000000, 0x00200001);
+ status |= test__truncdfsf2(0x37f00000a0000000, 0x00200001);
+ status |= test__truncdfsf2(0x37f00000bfffffff, 0x00200001);
+ status |= test__truncdfsf2(0x37f00000c0000000, 0x00200002);
+ status |= test__truncdfsf2(0x37f00000c0000001, 0x00200002);
+ status |= test__truncdfsf2(0x37f00000e0000000, 0x00200002);
+ status |= test__truncdfsf2(0x3800000000000000, 0x00400000);
+ status |= test__truncdfsf2(0x3800000010000000, 0x00400000);
+ status |= test__truncdfsf2(0x3800000020000000, 0x00400000);
+ status |= test__truncdfsf2(0x3800000030000000, 0x00400001);
+ status |= test__truncdfsf2(0x3800000040000000, 0x00400001);
+ status |= test__truncdfsf2(0x3800000050000000, 0x00400001);
+ status |= test__truncdfsf2(0x3800000060000000, 0x00400002);
+ status |= test__truncdfsf2(0x3800000070000000, 0x00400002);
+ status |= test__truncdfsf2(0x380fffffffffffff, 0x00800000);
+ status |= test__truncdfsf2(0x3810000000000000, 0x00800000);
+ status |= test__truncdfsf2(0x3810000008000000, 0x00800000);
+ status |= test__truncdfsf2(0x3810000010000000, 0x00800000);
+ status |= test__truncdfsf2(0x3810000018000000, 0x00800001);
+ status |= test__truncdfsf2(0x3810000020000000, 0x00800001);
+ status |= test__truncdfsf2(0x3810000028000000, 0x00800001);
+ status |= test__truncdfsf2(0x3810000030000000, 0x00800002);
+ status |= test__truncdfsf2(0x3810000038000000, 0x00800002);
+ status |= test__truncdfsf2(0x3ff0000000000000, 0x3f800000);
+ status |= test__truncdfsf2(0x3ff0000008000000, 0x3f800000);
+ status |= test__truncdfsf2(0x3ff0000010000000, 0x3f800000);
+ status |= test__truncdfsf2(0x3ff0000018000000, 0x3f800001);
+ status |= test__truncdfsf2(0x3ff0000028000000, 0x3f800001);
+ status |= test__truncdfsf2(0x3ff0000030000000, 0x3f800002);
+ status |= test__truncdfsf2(0x3ff0000038000000, 0x3f800002);
+ status |= test__truncdfsf2(0x4000000000000000, 0x40000000);
+ status |= test__truncdfsf2(0x47efffffe8000000, 0x7f7fffff);
+ status |= test__truncdfsf2(0x47effffff0000000, 0x7f800000);
+ status |= test__truncdfsf2(0x47effffff8000000, 0x7f800000);
+ status |= test__truncdfsf2(0x7fc0000000000000, 0x7f800000);
+ status |= test__truncdfsf2(0x7ff0000000000000, 0x7f800000);
+ status |= test__truncdfsf2(0x8010000000000000, 0x80000000);
+ status |= test__truncdfsf2(0xbff0000008000000, 0xbf800000);
+ status |= test__truncdfsf2(0xbff0000010000000, 0xbf800000);
+ status |= test__truncdfsf2(0xbff0000018000000, 0xbf800001);
+ status |= test__truncdfsf2(0xbff0000028000000, 0xbf800001);
+ status |= test__truncdfsf2(0xbff0000030000000, 0xbf800002);
+ status |= test__truncdfsf2(0xbff0000038000000, 0xbf800002);
+ status |= test__truncdfsf2(0xc024000000000000, 0xc1200000);
+ status |= test__truncdfsf2(0xc7efffffe8000000, 0xff7fffff);
+ status |= test__truncdfsf2(0xc7effffff0000000, 0xff800000);
+ status |= test__truncdfsf2(0xc7effffff8000000, 0xff800000);
+ status |= test__truncdfsf2(0xffc0000000000000, 0xff800000);
+ status |= test__truncdfsf2(0xfff0000000000000, 0xff800000);
+ status |= test__truncdfsf2(0x3780000000000000, 0x00004000);
+ status |= test__truncdfsf2(0xb780000000000000, 0x80004000);
+ status |= test__truncdfsf2(0x0000000080000000, 0x00000000);
+ status |= test__truncdfsf2(0x8000000080000000, 0x80000000);
+ status |= test__truncdfsf2(0x380ffffff0000000, 0x00800000);
+ status |= test__truncdfsf2(0x380fffffd0000000, 0x007fffff);
+ status |= test__truncdfsf2(0x380fffffe8000000, 0x00800000);
+ status |= test__truncdfsf2(0x380fffffc8000000, 0x007fffff);
+ status |= test__truncdfsf2(0xb80ffffff0000000, 0x80800000);
+ status |= test__truncdfsf2(0xb80fffffd0000000, 0x807fffff);
+ status |= test__truncdfsf2(0xb80fffffe8000000, 0x80800000);
+ status |= test__truncdfsf2(0xb80fffffc8000000, 0x807fffff);
+ status |= test__truncdfsf2(0x0000000000000000, 0x00000000);
+ status |= test__truncdfsf2(0x8000000000000000, 0x80000000);
+ status |= test__truncdfsf2(0xc7e0000010000000, 0xff000000);
+
+ // Test that the result of an operation is a NaN at all when it should be.
+ //
+ // In most configurations these tests' results are checked compared using
+ // compareResultF, so we set all the answers to the canonical NaN 0x7fc00000,
+ // which causes compareResultF to accept any NaN encoding. We also use the
+ // same value as the input NaN in tests that have one, so that even in
+ // EXPECT_EXACT_RESULTS mode these tests should pass, because 0x7fc00000 is
+ // still the exact expected NaN.
+ status |= test__truncdfsf2(0x7ff8000000000000, 0x7fc00000);
+
+#ifdef ARM_NAN_HANDLING
+ // Tests specific to the NaN handling of Arm hardware, mimicked by
+ // arm/truncdfsf2.S:
+ //
+ // - a quiet NaN is distinguished by the top mantissa bit being 1
+ //
+ // - converting a quiet NaN from double to float is done by keeping
+ // the topmost 23 bits of the mantissa and discarding the lower
+ // ones
+ //
+ // - if the input is a signalling NaN, its top mantissa bit is set
+ // to turn it quiet, and then that quiet NaN is converted to
+ // float as above
+ status |= test__truncdfsf2(0x7ff0000000000001, 0x7fc00000);
+ status |= test__truncdfsf2(0x7ff753b1887bcf03, 0x7ffa9d8c);
+ status |= test__truncdfsf2(0x7ff911d3c0abfdda, 0x7fc88e9e);
+ status |= test__truncdfsf2(0xfff0000000000001, 0xffc00000);
+ status |= test__truncdfsf2(0xfff753b1887bcf03, 0xfffa9d8c);
+ status |= test__truncdfsf2(0xfff911d3c0abfdda, 0xffc88e9e);
+
+#endif // ARM_NAN_HANDLING
+
+ return status;
+}
More information about the llvm-branch-commits
mailing list