[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized FP double <-> single conversion (PR #179926)

Thu Feb 5 04:03:34 PST 2026

https://github.com/statham-arm created https://github.com/llvm/llvm-project/pull/179926

This commit provides assembly versions of the conversions both ways between double and float.

>From 35dd800cbe1eb4d571c47254530ee75e7b98f500 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Thu, 29 Jan 2026 16:12:53 +0000
Subject: [PATCH] [compiler-rt][ARM] Optimized FP double <-> single conversion

This commit provides assembly versions of the conversions both ways
between double and float.
---
 compiler-rt/lib/builtins/CMakeLists.txt       |   2 +
 compiler-rt/lib/builtins/arm/extendsfdf2.S    | 195 ++++++++++
 compiler-rt/lib/builtins/arm/truncdfsf2.S     | 198 ++++++++++
 .../test/builtins/Unit/extendsfdf2new_test.c  | 123 ++++++
 .../test/builtins/Unit/truncdfsf2new_test.c   | 367 ++++++++++++++++++
 5 files changed, 885 insertions(+)
 create mode 100644 compiler-rt/lib/builtins/arm/extendsfdf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/truncdfsf2.S
 create mode 100644 compiler-rt/test/builtins/Unit/extendsfdf2new_test.c
 create mode 100644 compiler-rt/test/builtins/Unit/truncdfsf2new_test.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 0c53781a51392..6b392c8eb22f0 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -456,6 +456,8 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
       arm/gesf2.S
       arm/unorddf2.S
       arm/unordsf2.S
+      arm/extendsfdf2.S
+      arm/truncdfsf2.S
       )
     set_source_files_properties(${assembly_files}
       PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
diff --git a/compiler-rt/lib/builtins/arm/extendsfdf2.S b/compiler-rt/lib/builtins/arm/extendsfdf2.S
new file mode 100644
index 0000000000000..21518d4a75b1a
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/extendsfdf2.S
@@ -0,0 +1,195 @@
+//===-- extendsfdf2.S - single- to double-precision FP conversion ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __extendsfdf2 function (single to double precision
+// floating point conversion) for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__extendsfdf2)
+  push {r4, lr}
+  vmov r0, s0
+  bl __aeabi_f2d
+  VMOV_TO_DOUBLE(d0, r0, r1)
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__extendsfdf2, __aeabi_f2d)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_f2d)
+
+  // Start with the fast path, dealing with normalized single-precision inputs.
+  // We handle these as quickly as possible in straight-line code, and branch
+  // out of line to a single 'handle everything else' label which will have to
+  // figure out what kind of unusual thing has happened.
+
+  // Extend the exponent field by 3 bits, by shifting the sign bit off the top
+  // of r0 into the carry flag, shifting the rest of the input word right by 3,
+  // then using RRX to put the sign back. So we end up with a word shaped like
+  // the top half of a double, but the exponent field is still biased by the
+  // single-precision offset of 0x7f instead of the double-precision 0x3ff.
+  lsls    r3, r0, #1
+  lsr     r12, r3, #3
+  rrx     r12, r12
+
+  // For a normalized number, the remaining steps are to rebias the exponent,
+  // recover the remaining 3 mantissa bits from r0 which aren't included in the
+  // word we've just made, and move both into the right output registers.
+  //
+  // But we must also check for the difficult cases. These occur when the input
+  // exponent is either 0 or 0xFF. Those two values can be identified by the
+  // property that exp XOR (exp << 1) has the top 7 bits all zero.
+
+  // Do the test for uncommon values. Instead of using a shifter operand in the
+  // obvious way (EOR output, r0, r0, lsl #1), we use the fact that the setup
+  // code above already has a shifted-left copy of the input word in r3. In
+  // Thumb, this makes the EORS a 16-bit instruction instead of 32-bit.
+  eors    r3, r3, r0
+
+  // Now prepare the output, for normal inputs.
+  //
+  // We make this pair of instructions conditional on NE, i.e. we skip it if r3
+  // and r0 were actually equal (which could only happen if r0 was 0, i.e. the
+  // input was +0). This is fine, because in that situation the input wasn't
+  // normalized, so we aren't going to return this output anyway.
+  //
+  // The _point_ of conditionalizing these two instructions is that this way we
+  // have only one IT instruction on the fast path, and it's _here_, where this
+  // comment is, so that it comes immediately after the above 16-bit EORS and
+  // can be executed in the same cycle by Cortex-M3.
+  lslne   xl, r0, #29           // xl now has the bottom 3 input mantissa bits
+  addne   xh, r12, #(0x3ff - 0x7f) << 20 // rebias exponent in xh
+
+  // Finally, check whether the test word in r3 has its top 7 exponent bits
+  // zero. If not, we can return the fast-path answer.
+  tstne   r3, #0x7f000000
+  bxne    lr
+
+  // Now we've handled the fast-path cases as fast as we know how, what do we
+  // do next? We almost certainly don't have the input value in r0 any more,
+  // because we overwrote it by writing an unused output to xh:xl in the above
+  // code. Worse, we didn't _reliably_ overwrite it, because those writes to
+  // xh:xl might not have happened if the whole test word in r3 was zero. So
+  // where can we find the input bits?
+  //
+  // We have r3 = input XOR (input << 1). That's actually an invertible
+  // transformation, so in principle we could recover the full original input
+  // float from just r3. The quickest way to do that involves these five
+  // instructions (in any order, since they commute):
+  //
+  //   EOR     r3, r3, r3, lsl #16
+  //   EOR     r3, r3, r3, lsl #8
+  //   EOR     r3, r3, r3, lsl #4
+  //   EOR     r3, r3, r3, lsl #2
+  //   EOR     r3, r3, r3, lsl #1
+  //
+  // But that's rather slow, and we can do better. r12 contains most of the
+  // input bits in a more usable form: we inserted three zero bits between the
+  // sign and the top of the exponent, but everything from the input is there
+  // _somewhere_, except for the low 3 bits.
+  //
+  // However, on one code path below we'll use a subset of those EOR
+  // instructions to recover the low 3 bits of the input.
+
+  // First, find out whether the input exponent was 0 (zero or denormal), or
+  // 0xFF (infinity or NaN). We know it was one of the two, or we would have
+  // taken the early return from the fast path. So it's enough to test any
+  // single bit of the exponent in r12.
+  tst     r12, #1<<27           // bit 27 is topmost bit of the 8-bit exponent
+  bne     LOCAL_LABEL(inf_or_nan)
+
+  // If we didn't take that branch, we have a denormal or zero. Zeroes are
+  // likely to be common, so we'd prefer to handle those with highest priority.
+  //
+  // r3 = (input XOR (input << 1)) will take the values 0 or 0x80000000 for a
+  // zero input. So it contains precisely the right value to return in xh.
+  //
+  // The BICS here combines the zeroing of xl with the test of r3, because it
+  // sets Z if and only if the input was one of those two values, and if so,
+  // sets xl=0.
+  //
+  // Unfortunately this has the side effect of clobbering xl in the case where
+  // we _don't_ take the early return, so now we've lost our verbatim copy of
+  // the low 3 input bits! On the denormal-handling path we'll have to recover
+  // those from r3 more awkwardly. But denormal handling is rare, and slow
+  // anyway, so it's worth the awkwardness to save a cycle in the much more
+  // common case of a zero input.
+  bics    xl, r3, #0x80000000   // EQ if output is zero
+  moveq   xh, r3                // if so, copy input sign into xh
+  bxeq    lr                    // and return
+
+  // Now we know we're dealing with a denormal, so we need to recover the whole
+  // input mantissa. Most of it is in r12, but those last three bits now need
+  // to be reconstructed from r3 by using part of the shift+EOR trick shown
+  // above. We only need the left shifts by 1 and by 2, because the other three
+  // don't affect the bottom 3 bits at all.
+  eor     r3, r3, r3, lsl #2
+  eor     r3, r3, r3, lsl #1
+  and     r3, r3, #7
+
+  // Now r3 contains just the low bits of the mantissa. The rest of the
+  // mantissa is in r12, shifted right by 3 bits, so this instruction rebuilds
+  // the entire input mantissa in xh. (The exponent field is known to be zero,
+  // and the sign bit at the top of r12 is discarded by the left shift.)
+  orr     xh, r3, r12, lsl #3
+
+  // Renormalize that input mantissa so that its high bit is at the top of the
+  // word.
+  clz     r2, xh
+  lsl     xh, xh, r2
+
+  // Compute the right sign + exponent to go with that mantissa.
+  //
+  // If the input mantissa had had only its low bit set, then the input float
+  // would be 2^-149, which has a double-precision exponent of 0x36a. In that
+  // situation we'd have r2 = 31 (output from the CLZ). So we need the output
+  // exponent to be (0x389 - r2). But the leading bit of the mantissa will
+  // increment the exponent field when we add them together, so in fact we want
+  // to calculate (0x388 - r2). That's particularly convenient, because 0x388
+  // fits in an AArch32 immediate field!
+  and     r3, r12, #0x80000000  // get the sign bit from the top of r12
+  add     r3, r3, #0x388 << 20  // add the exponent bias as calculated above
+  sub     r3, r3, r2, lsl #20   // subtract the CLZ output
+
+  // Finally, distribute the normalized mantissa across the two output words,
+  // and combine the top half with the exponent we just computed.
+  lsls    xl, xh, #21           // low word = low 3 bits of normalized mantissa
+  add     xh, r3, xh, lsr #11   // high word = sign + exp + rest of mantissa
+  bx      lr
+
+LOCAL_LABEL(inf_or_nan):
+  // We come here if the input was either infinity or a NaN. In this situation
+  // we can be sure that the instructions that set up the fast-path return
+  // value _did_ happen, because the input was nonzero. Also we branched away
+  // before the test for a zero input clobbered xl.
+  //
+  // So xh:xl will contain what _would_ be the right output value if 0xFF were
+  // not a special input: the exponent field will be 0x47f, and the sign and
+  // mantissa will be in place.
+  //
+  // This is almost exactly what we really want to return, except for two
+  // things: the exponent should be corrected to 0x7ff for an output infinity
+  // or NaN, and if the mantissa is nonzero at all (so that we're returning a
+  // NaN and not an infinity) then we should set its top bit to make it a quiet
+  // NaN.
+  orrs    xh, xh, #0x7f000000   // set the missing bits in the exponent field
+  orrs    r2, xl, xh, lsl #12   // is any bit of the mantissa set?
+  orrne   xh, xh, #0x00080000   // if so, set the top mantissa bit
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_f2d)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/truncdfsf2.S b/compiler-rt/lib/builtins/arm/truncdfsf2.S
new file mode 100644
index 0000000000000..d87fce8d1bcbb
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/truncdfsf2.S
@@ -0,0 +1,198 @@
+//===-- truncdfsf2.S - double- to single precision FP conversion ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the __truncdfsf2 function (double to single precision
+// floating point conversion), with the IEEE-754 default rounding (to nearest,
+// ties to even), for the Arm and Thumb2 ISAs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+#include "endian.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__truncdfsf2)
+  push {r4, lr}
+  VMOV_FROM_DOUBLE(r0, r1, d0)
+  bl __aeabi_d2f
+  vmov s0, r0
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__truncdfsf2, __aeabi_d2f)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_d2f)
+
+  // Start with the fast path, dealing with input values that give a normalized
+  // single-precision output. We handle these as quickly as possible in
+  // straight-line code, and branch out of line to a single 'handle everything
+  // else' label which will have to figure out what kind of unusual thing has
+  // happened.
+
+  // Split xh into the sign bit (in r3) and everything else (r2), so that we
+  // can change the width of the exponent field and then put the sign back on
+  // later.
+  bic     r2, xh, #0x80000000
+  and     r3, xh, #0x80000000
+
+  // Rebias the exponent, still in its double-precision location, to account
+  // for the difference between double- and single-precision exponents.
+  sub     r2, r2, #(0x3ff-0x7f) << 20
+
+  // If the exponent field is now 0 or less, we have an underflow or an exact
+  // zero. If it's 0xFF or more, we have an overflow, or a NaN or infinity as
+  // input. Detect all of those in a combined test, and branch out of line.
+  cmp     r2, #0x00100000       // LO if output too small
+  rsbshs  r12, r2, #0x0ff00000  // otherwise, set LS if output too large
+  bls     LOCAL_LABEL(uncommon)          // so now LS means one or the other happened
+
+  // We've disposed of all the uncommon cases, so we know we're returning a
+  // normalized float, but we might still need to round it. Shift the round bit
+  // into the C flag, also setting Z if everything below that is zero.
+  lsls    r12, xl, #4
+
+  // Put the result back together, by recombining the sign (in r3) with the
+  // exponent and top of the mantissa (in r2, needing to be shifted left 3
+  // bits), plus the top 3 bits of xl. The last of those is put on with an ADC
+  // instruction, which also rounds up if the bit we just shifted into C was
+  // set.
+  orr     r2, r3, r2, lsl #3    // sign + exponent + most of mantissa
+  adc     r0, r2, xl, lsr #29   // low 3 bits of mantissa + maybe round up
+
+  // If C=1 and Z=1, we need to round to even. Otherwise we're finished. So we
+  // conditionally return based on one of those flags, then clear the low
+  // output bit based on the other.
+  //
+  // Which way round? On the assumption that input mantissas are roughly
+  // uniformly distributed, _almost all_ input doubles will contain a 1 bit
+  // somewhere in the bottom 28 bits, so we return early in the vast majority
+  // of cases by testing Z first. If we tested C first, we'd expect to return
+  // early only half the time, costing two extra instructions half the time
+  // instead of 1/2^28 of the time.
+  //
+  // (That's a bit optimistic, because of course in some situations input
+  // mantissas _won't_ be that uniform. In particular, if you converted from a
+  // float, did a small amount of calculation in double, and converted back,
+  // the round-to-even case might come up more often. But at least _some_
+  // applications will be passing doubles that make use of the whole mantissa,
+  // so I think this is still the more sensible way round to do the test.)
+  bxne    lr                    // return if Z=0
+  biccs   r0, r0, #1            // Z=1, so round to even if C=1 too
+  bx      lr                    // and now return unconditionally
+
+LOCAL_LABEL(uncommon):
+  // We come here if anything at all goes wrong on the fast path. We could have
+  // an interesting kind of input - zero, denormal, infinity or NaN - or we
+  // could have a normalized double-precision input too large or too small to
+  // yield a normalized single-precision output.
+  //
+  // Of the various cases, the most important one to handle quickly is a zero
+  // input, because those are probably fairly common. So the very first thing
+  // we do is test if the input is zero, and if so, return the same sign of
+  // zero by simply using xh as the return value.
+  orrs    r12, xl, xh, lsl #1   // are all bits of xh:xl 0 except the sign bit?
+
+#ifndef __BIG_ENDIAN__
+  // In little-endian, xh (containing the desired sign bit) and r0 (the output
+  // register) aren't the same. This instruction can be skipped in big-endian,
+  // where the correct output value is already in r0.
+  moveq   r0, xh
+#endif
+  bxeq    lr
+
+  // Separate the remaining cases into three types: too small (underflow,
+  // whether or not the input was a denormal), too big (overflow or input
+  // infinity, which we treat the same in the absence of FP exceptions), and
+  // NaN.
+  //
+  // At this stage r2 contains the output exponent, rebiased to its
+  // single-precision value, but at bit 20 (that is, still in the
+  // double-precision position). Detect underflow by doing a signed comparison
+  // against the minimum normalized single-precision exponent.
+  cmp     r2, #0x00100000
+  blt     LOCAL_LABEL(underflow)
+
+  // Now figure out whether we had a NaN as input, by shifting xh left by a bit
+  // (discarding the sign) and setting the new low bit if xl != 0. This gives a
+  // value which is greater than 0xFFE00000 (in an unsigned comparison) for
+  // precisely NaN inputs.
+  cmp     xl, #1                // set C if xl != 0
+  adc     r12, xh, xh           // shift that in to the bottom of xh
+  cmn     r12, #0x00200000      // is the result > 0xFFE00000?
+  bhi     LOCAL_LABEL(nan)               // if so, go and handle a NaN
+
+  // If we're still here, we have a finite overflow, or an input infinity. We
+  // don't have to figure out which: we return an infinity of the appropriate
+  // sign in both cases. So keep just the sign of xh, and make an infinity out
+  // of the rest of the bits.
+  mvn     r0, xh, lsr #31       // shift sign bit down to bit 0 and flip it
+  mvn     r0, r0, lsl #8        // flip it back, putting 8 set bits below it
+  lsl     r0, r0, #23           // and shift those 9 bits back up to the top
+  bx      lr
+
+LOCAL_LABEL(nan):
+  // We have a double-precision NaN input. The Arm NaN handling rules say that
+  // we make the output single-precision NaN by keeping the sign and as much of
+  // the mantissa as possible (starting from the top bit). But we also set the
+  // top bit of the mantissa, which makes the output NaN quiet even if the
+  // input one was signaling.
+  //
+  // So this code looks a bit like a miniature version of the fast path: we
+  // keep the bottom 8 bits of the exponent in xh as the output exponent (we
+  // know it's all 1s, which is what we want), plus all the mantissa bits below
+  // it; shift all of that 3 bits left and recombine with the sign; then
+  // combine with the top 3 bits of xl. Finally, set the top mantissa bit.
+  bic     r2, xh, #0xF0000000   // everything from xh we want to shift left
+  orr     r0, r3, xl, lsr #29   // combine sign with low 3 output mantissa bits
+  orr     r0, r0, r2, lsl #3    // combine that with the shifted-up value in r2
+  orr     r0, r0, #0x00400000   // set the top mantissa bit to make it a QNaN
+  bx      lr
+
+LOCAL_LABEL(underflow):
+  // We have an input value small enough to underflow. The basic strategy is to
+  // leave __funder to deal with the details.
+  //
+  // Normally __funder expects to get a value that's already been rounded, and
+  // will re-round it, for which it also needs to know which way the value has
+  // been rounded already. In this case we haven't rounded _yet_. Rather than
+  // carefully rounding to nearest, it's easier to just make the __funder input
+  // value by truncating the mantissa (i.e. round towards zero), and set the
+  // rounding direction accordingly.
+
+  // Rebias the exponent (again) to make an IEEE 754 underflow intermediate. If
+  // this still doesn't make r2 positive, then the result is so small that it
+  // will underflow to 0 anyway, so it doesn't really matter what exponent we
+  // do provide - we just clear the top 8 bits of r2 to ensure the sign is
+  // right and the exponent is _something_ small.
+  adds    r2, r2, #0x0c000000   // exponent bias (still shifted down 3 bits)
+  bicmi   r2, r2, #0xff000000   // handle exponent still being negative
+
+  // Test the bits we're going to shift off the mantissa, to see if any are
+  // zero. This will determine the rounding direction we pass to __funder,
+  // because although we never round _up_ on this path, we must still tell it
+  // whether the value we pass it was rounded down or was already exact.
+  lsls    r12, xl, #3           // set Z if the intermediate value is exact
+
+  // Put together the intermediate value to pass to __funder.
+  orr     r2, r3, r2, lsl #3    // sign + exponent + most of mantissa
+  orr     r0, r2, xl, lsr #29   // combine with top 3 bits of xl
+
+  // Set the rounding direction flag based on the test above.
+  moveq   r1, #0                // intermediate is exact
+  movne   r1, #1                // intermediate is too small (we didn't round)
+
+  // And tailcall __funder to do the rest of the job.
+  b       SYMBOL_NAME(__compiler_rt_funder)
+
+END_COMPILERRT_FUNCTION(__aeabi_d2f)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c b/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c
new file mode 100644
index 0000000000000..04446488f73bf
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/extendsfdf2new_test.c
@@ -0,0 +1,123 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_extendsfdf2
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// By default this test uses compareResultD to check the returned floats, which
+// accepts any returned NaN if the expected result is the canonical NaN value
+// 0x7ff8000000000000. For the Arm optimized FP implementation, which commits
+// to a more detailed handling of NaNs, we tighten up the check and include
+// some extra test cases specific to that NaN policy.
+#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP
+#  define EXPECT_EXACT_RESULTS
+#  define ARM_NAN_HANDLING
+#endif
+
+// Returns: a converted from float to double
+COMPILER_RT_ABI double __extendsfdf2(float a);
+
+int test__extendsfdf2(int line, uint32_t a_rep, uint64_t expected_rep) {
+  float a = fromRep32(a_rep);
+  double x = __extendsfdf2(a);
+#ifdef EXPECT_EXACT_RESULTS
+  int ret = toRep64(x) != expected_rep;
+#else
+  int ret = compareResultD(x, expected_rep);
+#endif
+
+  if (ret) {
+    printf("error at line %d: __extendsfdf2(%08" PRIx32 ") = %016" PRIx64
+           ", expected %016" PRIx64 "\n",
+           line, a_rep, toRep64(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__extendsfdf2(a,x) test__extendsfdf2(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__extendsfdf2(0x00000001, 0x36a0000000000000);
+  status |= test__extendsfdf2(0x00000003, 0x36b8000000000000);
+  status |= test__extendsfdf2(0x00000005, 0x36c4000000000000);
+  status |= test__extendsfdf2(0x00000009, 0x36d2000000000000);
+  status |= test__extendsfdf2(0x00000011, 0x36e1000000000000);
+  status |= test__extendsfdf2(0x00000021, 0x36f0800000000000);
+  status |= test__extendsfdf2(0x00000041, 0x3700400000000000);
+  status |= test__extendsfdf2(0x00000081, 0x3710200000000000);
+  status |= test__extendsfdf2(0x00000101, 0x3720100000000000);
+  status |= test__extendsfdf2(0x00000201, 0x3730080000000000);
+  status |= test__extendsfdf2(0x00000401, 0x3740040000000000);
+  status |= test__extendsfdf2(0x00000801, 0x3750020000000000);
+  status |= test__extendsfdf2(0x00001001, 0x3760010000000000);
+  status |= test__extendsfdf2(0x00002001, 0x3770008000000000);
+  status |= test__extendsfdf2(0x00004001, 0x3780004000000000);
+  status |= test__extendsfdf2(0x00008001, 0x3790002000000000);
+  status |= test__extendsfdf2(0x00010001, 0x37a0001000000000);
+  status |= test__extendsfdf2(0x00020001, 0x37b0000800000000);
+  status |= test__extendsfdf2(0x00040001, 0x37c0000400000000);
+  status |= test__extendsfdf2(0x00080001, 0x37d0000200000000);
+  status |= test__extendsfdf2(0x00100001, 0x37e0000100000000);
+  status |= test__extendsfdf2(0x00200001, 0x37f0000080000000);
+  status |= test__extendsfdf2(0x00400001, 0x3800000040000000);
+  status |= test__extendsfdf2(0x00800001, 0x3810000020000000);
+  status |= test__extendsfdf2(0x01000001, 0x3820000020000000);
+  status |= test__extendsfdf2(0x20000001, 0x3c00000020000000);
+  status |= test__extendsfdf2(0x30000001, 0x3e00000020000000);
+  status |= test__extendsfdf2(0x3f800000, 0x3ff0000000000000);
+  status |= test__extendsfdf2(0x7f000000, 0x47e0000000000000);
+  status |= test__extendsfdf2(0x7f7fffff, 0x47efffffe0000000);
+  status |= test__extendsfdf2(0x7f800000, 0x7ff0000000000000);
+  status |= test__extendsfdf2(0xff000000, 0xc7e0000000000000);
+  status |= test__extendsfdf2(0xff7fffff, 0xc7efffffe0000000);
+  status |= test__extendsfdf2(0xff800000, 0xfff0000000000000);
+  status |= test__extendsfdf2(0x80800000, 0xb810000000000000);
+  status |= test__extendsfdf2(0x807fffff, 0xb80fffffc0000000);
+  status |= test__extendsfdf2(0x80400000, 0xb800000000000000);
+  status |= test__extendsfdf2(0x803fffff, 0xb7ffffff80000000);
+  status |= test__extendsfdf2(0x80000003, 0xb6b8000000000000);
+  status |= test__extendsfdf2(0x80000002, 0xb6b0000000000000);
+  status |= test__extendsfdf2(0x80000001, 0xb6a0000000000000);
+  status |= test__extendsfdf2(0x80000000, 0x8000000000000000);
+
+  // Test that the result of an operation is a NaN at all when it should be.
+  //
+  // In most configurations these tests' results are checked compared using
+  // compareResultD, so we set all the answers to the canonical NaN
+  // 0x7ff8000000000000, which causes compareResultF to accept any NaN
+  // encoding. We also use the same value as the input NaN in tests that have
+  // one, so that even in EXPECT_EXACT_RESULTS mode these tests should pass,
+  // because 0x7ff8000000000000 is still the exact expected NaN.
+  status |= test__extendsfdf2(0x7fc00000, 0x7ff8000000000000);
+
+#ifdef ARM_NAN_HANDLING
+  // Tests specific to the NaN handling of Arm hardware, mimicked by
+  // arm/extendsfdf2.S:
+  //
+  //  - a quiet NaN is distinguished by the top mantissa bit being 1
+  //
+  //  - converting a quiet NaN from float to double is done by copying
+  //    the input mantissa bits to the top of the output mantissa and
+  //    appending 0 bits below them
+  //
+  //  - if the input is a signalling NaN, its top mantissa bit is set
+  //    to turn it quiet, and then that quiet NaN is converted to
+  //    double as above
+  status |= test__extendsfdf2(0x7faf53b1, 0x7ffdea7620000000);
+  status |= test__extendsfdf2(0x7fe111d3, 0x7ffc223a60000000);
+  status |= test__extendsfdf2(0xffaf53b1, 0xfffdea7620000000);
+  status |= test__extendsfdf2(0xffe111d3, 0xfffc223a60000000);
+
+#endif // ARM_NAN_HANDLING
+
+  return status;
+}
diff --git a/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c b/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c
new file mode 100644
index 0000000000000..0542f97643618
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/truncdfsf2new_test.c
@@ -0,0 +1,367 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_truncdfsf2
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// By default this test uses compareResultF to check the returned floats, which
+// accepts any returned NaN if the expected result is the canonical NaN value
+// 0x7fc00000. For the Arm optimized FP implementation, which commits to a more
+// detailed handling of NaNs, we tighten up the check and include some extra
+// test cases specific to that NaN policy.
+#if (__arm__ && !(__thumb__ && !__thumb2__)) && COMPILER_RT_ARM_OPTIMIZED_FP
+#  define EXPECT_EXACT_RESULTS
+#  define ARM_NAN_HANDLING
+#endif
+
+// Returns: a converted from double to float
+COMPILER_RT_ABI float __truncdfsf2(double a);
+
+int test__truncdfsf2(int line, uint64_t a_rep, uint32_t expected_rep) {
+  double a = fromRep64(a_rep);
+  float x = __truncdfsf2(a);
+#ifdef EXPECT_EXACT_RESULTS
+  int ret = toRep32(x) != expected_rep;
+#else
+  int ret = compareResultF(x, expected_rep);
+#endif
+
+  if (ret) {
+    printf("error at line %d: __truncdfsf2(%016" PRIx64 ") = %08" PRIx32
+           ", expected %08" PRIx32 "\n",
+           line, a_rep, toRep32(x), expected_rep);
+  }
+  return ret;
+}
+
+#define test__truncdfsf2(a,x) test__truncdfsf2(__LINE__,a,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__truncdfsf2(0x0000000000000001, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000002, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000004, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000008, 0x00000000);
+  status |= test__truncdfsf2(0x000000000000001a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000020, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000040, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000080, 0x00000000);
+  status |= test__truncdfsf2(0x000000000000019a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000200, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000400, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000000800, 0x00000000);
+  status |= test__truncdfsf2(0x000000000000189a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000002000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000004000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000008000, 0x00000000);
+  status |= test__truncdfsf2(0x000000000001789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000020000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000040000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000080000, 0x00000000);
+  status |= test__truncdfsf2(0x000000000016789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000200000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000400000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000000800000, 0x00000000);
+  status |= test__truncdfsf2(0x000000000156789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000002000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000004000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000008000000, 0x00000000);
+  status |= test__truncdfsf2(0x000000001456789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000020000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000040000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000080000000, 0x00000000);
+  status |= test__truncdfsf2(0x000000013465789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000000200000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000400000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000000800000000, 0x00000000);
+  status |= test__truncdfsf2(0x000000123456789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000002000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000004000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000008000000000, 0x00000000);
+  status |= test__truncdfsf2(0x000001123456789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000020000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000040000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000080000000000, 0x00000000);
+  status |= test__truncdfsf2(0x000010123456789a, 0x00000000);
+  status |= test__truncdfsf2(0x0000200000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000400000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0000800000000000, 0x00000000);
+  status |= test__truncdfsf2(0x000100123456789a, 0x00000000);
+  status |= test__truncdfsf2(0x0002000000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0004000000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0008000000000000, 0x00000000);
+  status |= test__truncdfsf2(0x0010000000000000, 0x00000000);
+  status |= test__truncdfsf2(0x36a0000000000000, 0x00000001);
+  status |= test__truncdfsf2(0x36b0000000000000, 0x00000002);
+  status |= test__truncdfsf2(0x36b2000000000000, 0x00000002);
+  status |= test__truncdfsf2(0x36b4000000000000, 0x00000002);
+  status |= test__truncdfsf2(0x36b6000000000000, 0x00000003);
+  status |= test__truncdfsf2(0x36b8000000000000, 0x00000003);
+  status |= test__truncdfsf2(0x36ba000000000000, 0x00000003);
+  status |= test__truncdfsf2(0x36bc000000000000, 0x00000004);
+  status |= test__truncdfsf2(0x36be000000000000, 0x00000004);
+  status |= test__truncdfsf2(0x36c0000000000000, 0x00000004);
+  status |= test__truncdfsf2(0x36c1000000000000, 0x00000004);
+  status |= test__truncdfsf2(0x36c2000000000000, 0x00000004);
+  status |= test__truncdfsf2(0x36c3000000000000, 0x00000005);
+  status |= test__truncdfsf2(0x36c4000000000000, 0x00000005);
+  status |= test__truncdfsf2(0x36c5000000000000, 0x00000005);
+  status |= test__truncdfsf2(0x36c6000000000000, 0x00000006);
+  status |= test__truncdfsf2(0x36c7000000000000, 0x00000006);
+  status |= test__truncdfsf2(0x36d0000000000000, 0x00000008);
+  status |= test__truncdfsf2(0x36d0800000000000, 0x00000008);
+  status |= test__truncdfsf2(0x36d1000000000000, 0x00000008);
+  status |= test__truncdfsf2(0x36d1800000000000, 0x00000009);
+  status |= test__truncdfsf2(0x36d2000000000000, 0x00000009);
+  status |= test__truncdfsf2(0x36d2800000000000, 0x00000009);
+  status |= test__truncdfsf2(0x36d3000000000000, 0x0000000a);
+  status |= test__truncdfsf2(0x36d3800000000000, 0x0000000a);
+  status |= test__truncdfsf2(0x36e0000000000000, 0x00000010);
+  status |= test__truncdfsf2(0x36e0400000000000, 0x00000010);
+  status |= test__truncdfsf2(0x36e0800000000000, 0x00000010);
+  status |= test__truncdfsf2(0x36e0c00000000000, 0x00000011);
+  status |= test__truncdfsf2(0x36e1000000000000, 0x00000011);
+  status |= test__truncdfsf2(0x36e1400000000000, 0x00000011);
+  status |= test__truncdfsf2(0x36e1800000000000, 0x00000012);
+  status |= test__truncdfsf2(0x36e1c00000000000, 0x00000012);
+  status |= test__truncdfsf2(0x36f0000000000000, 0x00000020);
+  status |= test__truncdfsf2(0x36f0200000000000, 0x00000020);
+  status |= test__truncdfsf2(0x36f0400000000000, 0x00000020);
+  status |= test__truncdfsf2(0x36f0600000000000, 0x00000021);
+  status |= test__truncdfsf2(0x36f0800000000000, 0x00000021);
+  status |= test__truncdfsf2(0x36f0a00000000000, 0x00000021);
+  status |= test__truncdfsf2(0x36f0c00000000000, 0x00000022);
+  status |= test__truncdfsf2(0x36f0e00000000000, 0x00000022);
+  status |= test__truncdfsf2(0x3700000000000000, 0x00000040);
+  status |= test__truncdfsf2(0x3700100000000000, 0x00000040);
+  status |= test__truncdfsf2(0x3700200000000000, 0x00000040);
+  status |= test__truncdfsf2(0x3700300000000000, 0x00000041);
+  status |= test__truncdfsf2(0x3700400000000000, 0x00000041);
+  status |= test__truncdfsf2(0x3700500000000000, 0x00000041);
+  status |= test__truncdfsf2(0x3700600000000000, 0x00000042);
+  status |= test__truncdfsf2(0x3700700000000000, 0x00000042);
+  status |= test__truncdfsf2(0x3710000000000000, 0x00000080);
+  status |= test__truncdfsf2(0x3710080000000000, 0x00000080);
+  status |= test__truncdfsf2(0x3710100000000000, 0x00000080);
+  status |= test__truncdfsf2(0x3710180000000000, 0x00000081);
+  status |= test__truncdfsf2(0x3710200000000000, 0x00000081);
+  status |= test__truncdfsf2(0x3710280000000000, 0x00000081);
+  status |= test__truncdfsf2(0x3710300000000000, 0x00000082);
+  status |= test__truncdfsf2(0x3710380000000000, 0x00000082);
+  status |= test__truncdfsf2(0x3720000000000000, 0x00000100);
+  status |= test__truncdfsf2(0x3720040000000000, 0x00000100);
+  status |= test__truncdfsf2(0x3720080000000000, 0x00000100);
+  status |= test__truncdfsf2(0x37200c0000000000, 0x00000101);
+  status |= test__truncdfsf2(0x3720100000000000, 0x00000101);
+  status |= test__truncdfsf2(0x3720140000000000, 0x00000101);
+  status |= test__truncdfsf2(0x3720180000000000, 0x00000102);
+  status |= test__truncdfsf2(0x37201c0000000000, 0x00000102);
+  status |= test__truncdfsf2(0x3730000000000000, 0x00000200);
+  status |= test__truncdfsf2(0x3730020000000000, 0x00000200);
+  status |= test__truncdfsf2(0x3730040000000000, 0x00000200);
+  status |= test__truncdfsf2(0x3730060000000000, 0x00000201);
+  status |= test__truncdfsf2(0x3730080000000000, 0x00000201);
+  status |= test__truncdfsf2(0x37300a0000000000, 0x00000201);
+  status |= test__truncdfsf2(0x37300c0000000000, 0x00000202);
+  status |= test__truncdfsf2(0x37300e0000000000, 0x00000202);
+  status |= test__truncdfsf2(0x3740000000000000, 0x00000400);
+  status |= test__truncdfsf2(0x3740010000000000, 0x00000400);
+  status |= test__truncdfsf2(0x3740020000000000, 0x00000400);
+  status |= test__truncdfsf2(0x3740030000000000, 0x00000401);
+  status |= test__truncdfsf2(0x3740040000000000, 0x00000401);
+  status |= test__truncdfsf2(0x3740050000000000, 0x00000401);
+  status |= test__truncdfsf2(0x3740060000000000, 0x00000402);
+  status |= test__truncdfsf2(0x3740070000000000, 0x00000402);
+  status |= test__truncdfsf2(0x3750000000000000, 0x00000800);
+  status |= test__truncdfsf2(0x3750008000000000, 0x00000800);
+  status |= test__truncdfsf2(0x3750010000000000, 0x00000800);
+  status |= test__truncdfsf2(0x3750018000000000, 0x00000801);
+  status |= test__truncdfsf2(0x3750020000000000, 0x00000801);
+  status |= test__truncdfsf2(0x3750028000000000, 0x00000801);
+  status |= test__truncdfsf2(0x3750030000000000, 0x00000802);
+  status |= test__truncdfsf2(0x3750038000000000, 0x00000802);
+  status |= test__truncdfsf2(0x3760000000000000, 0x00001000);
+  status |= test__truncdfsf2(0x3760004000000000, 0x00001000);
+  status |= test__truncdfsf2(0x3760008000000000, 0x00001000);
+  status |= test__truncdfsf2(0x376000c000000000, 0x00001001);
+  status |= test__truncdfsf2(0x3760010000000000, 0x00001001);
+  status |= test__truncdfsf2(0x3760014000000000, 0x00001001);
+  status |= test__truncdfsf2(0x3760018000000000, 0x00001002);
+  status |= test__truncdfsf2(0x376001c000000000, 0x00001002);
+  status |= test__truncdfsf2(0x3770000000000000, 0x00002000);
+  status |= test__truncdfsf2(0x3770002000000000, 0x00002000);
+  status |= test__truncdfsf2(0x3770004000000000, 0x00002000);
+  status |= test__truncdfsf2(0x3770006000000000, 0x00002001);
+  status |= test__truncdfsf2(0x3770008000000000, 0x00002001);
+  status |= test__truncdfsf2(0x377000a000000000, 0x00002001);
+  status |= test__truncdfsf2(0x377000c000000000, 0x00002002);
+  status |= test__truncdfsf2(0x377000e000000000, 0x00002002);
+  status |= test__truncdfsf2(0x3780000000000000, 0x00004000);
+  status |= test__truncdfsf2(0x3780001000000000, 0x00004000);
+  status |= test__truncdfsf2(0x3780002000000000, 0x00004000);
+  status |= test__truncdfsf2(0x3780003000000000, 0x00004001);
+  status |= test__truncdfsf2(0x3780004000000000, 0x00004001);
+  status |= test__truncdfsf2(0x3780005000000000, 0x00004001);
+  status |= test__truncdfsf2(0x3780006000000000, 0x00004002);
+  status |= test__truncdfsf2(0x3780007000000000, 0x00004002);
+  status |= test__truncdfsf2(0x3790000000000000, 0x00008000);
+  status |= test__truncdfsf2(0x3790000800000000, 0x00008000);
+  status |= test__truncdfsf2(0x3790001000000000, 0x00008000);
+  status |= test__truncdfsf2(0x3790001800000000, 0x00008001);
+  status |= test__truncdfsf2(0x3790002000000000, 0x00008001);
+  status |= test__truncdfsf2(0x3790002800000000, 0x00008001);
+  status |= test__truncdfsf2(0x3790003000000000, 0x00008002);
+  status |= test__truncdfsf2(0x3790003800000000, 0x00008002);
+  status |= test__truncdfsf2(0x37a0000000000000, 0x00010000);
+  status |= test__truncdfsf2(0x37a0000400000000, 0x00010000);
+  status |= test__truncdfsf2(0x37a0000800000000, 0x00010000);
+  status |= test__truncdfsf2(0x37a0000c00000000, 0x00010001);
+  status |= test__truncdfsf2(0x37a0001000000000, 0x00010001);
+  status |= test__truncdfsf2(0x37a0001400000000, 0x00010001);
+  status |= test__truncdfsf2(0x37a0001800000000, 0x00010002);
+  status |= test__truncdfsf2(0x37a0001c00000000, 0x00010002);
+  status |= test__truncdfsf2(0x37b0000000000000, 0x00020000);
+  status |= test__truncdfsf2(0x37b0000200000000, 0x00020000);
+  status |= test__truncdfsf2(0x37b0000400000000, 0x00020000);
+  status |= test__truncdfsf2(0x37b0000600000000, 0x00020001);
+  status |= test__truncdfsf2(0x37b0000800000000, 0x00020001);
+  status |= test__truncdfsf2(0x37b0000a00000000, 0x00020001);
+  status |= test__truncdfsf2(0x37b0000c00000000, 0x00020002);
+  status |= test__truncdfsf2(0x37b0000e00000000, 0x00020002);
+  status |= test__truncdfsf2(0x37c0000000000000, 0x00040000);
+  status |= test__truncdfsf2(0x37c0000100000000, 0x00040000);
+  status |= test__truncdfsf2(0x37c0000200000000, 0x00040000);
+  status |= test__truncdfsf2(0x37c0000300000000, 0x00040001);
+  status |= test__truncdfsf2(0x37c0000400000000, 0x00040001);
+  status |= test__truncdfsf2(0x37c0000500000000, 0x00040001);
+  status |= test__truncdfsf2(0x37c0000600000000, 0x00040002);
+  status |= test__truncdfsf2(0x37c0000700000000, 0x00040002);
+  status |= test__truncdfsf2(0x37d0000000000000, 0x00080000);
+  status |= test__truncdfsf2(0x37d0000080000000, 0x00080000);
+  status |= test__truncdfsf2(0x37d0000100000000, 0x00080000);
+  status |= test__truncdfsf2(0x37d0000180000000, 0x00080001);
+  status |= test__truncdfsf2(0x37d0000200000000, 0x00080001);
+  status |= test__truncdfsf2(0x37d0000280000000, 0x00080001);
+  status |= test__truncdfsf2(0x37d0000300000000, 0x00080002);
+  status |= test__truncdfsf2(0x37d0000380000000, 0x00080002);
+  status |= test__truncdfsf2(0x37e0000000000000, 0x00100000);
+  status |= test__truncdfsf2(0x37e0000040000000, 0x00100000);
+  status |= test__truncdfsf2(0x37e0000080000000, 0x00100000);
+  status |= test__truncdfsf2(0x37e00000c0000000, 0x00100001);
+  status |= test__truncdfsf2(0x37e0000100000000, 0x00100001);
+  status |= test__truncdfsf2(0x37e0000140000000, 0x00100001);
+  status |= test__truncdfsf2(0x37e0000180000000, 0x00100002);
+  status |= test__truncdfsf2(0x37e00001c0000000, 0x00100002);
+  status |= test__truncdfsf2(0x37f0000000000000, 0x00200000);
+  status |= test__truncdfsf2(0x37f0000020000000, 0x00200000);
+  status |= test__truncdfsf2(0x37f000003fffffff, 0x00200000);
+  status |= test__truncdfsf2(0x37f0000040000000, 0x00200000);
+  status |= test__truncdfsf2(0x37f0000040000001, 0x00200001);
+  status |= test__truncdfsf2(0x37f0000060000000, 0x00200001);
+  status |= test__truncdfsf2(0x37f0000080000000, 0x00200001);
+  status |= test__truncdfsf2(0x37f00000a0000000, 0x00200001);
+  status |= test__truncdfsf2(0x37f00000bfffffff, 0x00200001);
+  status |= test__truncdfsf2(0x37f00000c0000000, 0x00200002);
+  status |= test__truncdfsf2(0x37f00000c0000001, 0x00200002);
+  status |= test__truncdfsf2(0x37f00000e0000000, 0x00200002);
+  status |= test__truncdfsf2(0x3800000000000000, 0x00400000);
+  status |= test__truncdfsf2(0x3800000010000000, 0x00400000);
+  status |= test__truncdfsf2(0x3800000020000000, 0x00400000);
+  status |= test__truncdfsf2(0x3800000030000000, 0x00400001);
+  status |= test__truncdfsf2(0x3800000040000000, 0x00400001);
+  status |= test__truncdfsf2(0x3800000050000000, 0x00400001);
+  status |= test__truncdfsf2(0x3800000060000000, 0x00400002);
+  status |= test__truncdfsf2(0x3800000070000000, 0x00400002);
+  status |= test__truncdfsf2(0x380fffffffffffff, 0x00800000);
+  status |= test__truncdfsf2(0x3810000000000000, 0x00800000);
+  status |= test__truncdfsf2(0x3810000008000000, 0x00800000);
+  status |= test__truncdfsf2(0x3810000010000000, 0x00800000);
+  status |= test__truncdfsf2(0x3810000018000000, 0x00800001);
+  status |= test__truncdfsf2(0x3810000020000000, 0x00800001);
+  status |= test__truncdfsf2(0x3810000028000000, 0x00800001);
+  status |= test__truncdfsf2(0x3810000030000000, 0x00800002);
+  status |= test__truncdfsf2(0x3810000038000000, 0x00800002);
+  status |= test__truncdfsf2(0x3ff0000000000000, 0x3f800000);
+  status |= test__truncdfsf2(0x3ff0000008000000, 0x3f800000);
+  status |= test__truncdfsf2(0x3ff0000010000000, 0x3f800000);
+  status |= test__truncdfsf2(0x3ff0000018000000, 0x3f800001);
+  status |= test__truncdfsf2(0x3ff0000028000000, 0x3f800001);
+  status |= test__truncdfsf2(0x3ff0000030000000, 0x3f800002);
+  status |= test__truncdfsf2(0x3ff0000038000000, 0x3f800002);
+  status |= test__truncdfsf2(0x4000000000000000, 0x40000000);
+  status |= test__truncdfsf2(0x47efffffe8000000, 0x7f7fffff);
+  status |= test__truncdfsf2(0x47effffff0000000, 0x7f800000);
+  status |= test__truncdfsf2(0x47effffff8000000, 0x7f800000);
+  status |= test__truncdfsf2(0x7fc0000000000000, 0x7f800000);
+  status |= test__truncdfsf2(0x7ff0000000000000, 0x7f800000);
+  status |= test__truncdfsf2(0x8010000000000000, 0x80000000);
+  status |= test__truncdfsf2(0xbff0000008000000, 0xbf800000);
+  status |= test__truncdfsf2(0xbff0000010000000, 0xbf800000);
+  status |= test__truncdfsf2(0xbff0000018000000, 0xbf800001);
+  status |= test__truncdfsf2(0xbff0000028000000, 0xbf800001);
+  status |= test__truncdfsf2(0xbff0000030000000, 0xbf800002);
+  status |= test__truncdfsf2(0xbff0000038000000, 0xbf800002);
+  status |= test__truncdfsf2(0xc024000000000000, 0xc1200000);
+  status |= test__truncdfsf2(0xc7efffffe8000000, 0xff7fffff);
+  status |= test__truncdfsf2(0xc7effffff0000000, 0xff800000);
+  status |= test__truncdfsf2(0xc7effffff8000000, 0xff800000);
+  status |= test__truncdfsf2(0xffc0000000000000, 0xff800000);
+  status |= test__truncdfsf2(0xfff0000000000000, 0xff800000);
+  status |= test__truncdfsf2(0x3780000000000000, 0x00004000);
+  status |= test__truncdfsf2(0xb780000000000000, 0x80004000);
+  status |= test__truncdfsf2(0x0000000080000000, 0x00000000);
+  status |= test__truncdfsf2(0x8000000080000000, 0x80000000);
+  status |= test__truncdfsf2(0x380ffffff0000000, 0x00800000);
+  status |= test__truncdfsf2(0x380fffffd0000000, 0x007fffff);
+  status |= test__truncdfsf2(0x380fffffe8000000, 0x00800000);
+  status |= test__truncdfsf2(0x380fffffc8000000, 0x007fffff);
+  status |= test__truncdfsf2(0xb80ffffff0000000, 0x80800000);
+  status |= test__truncdfsf2(0xb80fffffd0000000, 0x807fffff);
+  status |= test__truncdfsf2(0xb80fffffe8000000, 0x80800000);
+  status |= test__truncdfsf2(0xb80fffffc8000000, 0x807fffff);
+  status |= test__truncdfsf2(0x0000000000000000, 0x00000000);
+  status |= test__truncdfsf2(0x8000000000000000, 0x80000000);
+  status |= test__truncdfsf2(0xc7e0000010000000, 0xff000000);
+
+  // Test that the result of an operation is a NaN at all when it should be.
+  //
+  // In most configurations these tests' results are checked compared using
+  // compareResultF, so we set all the answers to the canonical NaN 0x7fc00000,
+  // which causes compareResultF to accept any NaN encoding. We also use the
+  // same value as the input NaN in tests that have one, so that even in
+  // EXPECT_EXACT_RESULTS mode these tests should pass, because 0x7fc00000 is
+  // still the exact expected NaN.
+  status |= test__truncdfsf2(0x7ff8000000000000, 0x7fc00000);
+
+#ifdef ARM_NAN_HANDLING
+  // Tests specific to the NaN handling of Arm hardware, mimicked by
+  // arm/truncdfsf2.S:
+  //
+  //  - a quiet NaN is distinguished by the top mantissa bit being 1
+  //
+  //  - converting a quiet NaN from double to float is done by keeping
+  //    the topmost 23 bits of the mantissa and discarding the lower
+  //    ones
+  //
+  //  - if the input is a signalling NaN, its top mantissa bit is set
+  //    to turn it quiet, and then that quiet NaN is converted to
+  //    float as above
+  status |= test__truncdfsf2(0x7ff0000000000001, 0x7fc00000);
+  status |= test__truncdfsf2(0x7ff753b1887bcf03, 0x7ffa9d8c);
+  status |= test__truncdfsf2(0x7ff911d3c0abfdda, 0x7fc88e9e);
+  status |= test__truncdfsf2(0xfff0000000000001, 0xffc00000);
+  status |= test__truncdfsf2(0xfff753b1887bcf03, 0xfffa9d8c);
+  status |= test__truncdfsf2(0xfff911d3c0abfdda, 0xffc88e9e);
+
+#endif // ARM_NAN_HANDLING
+
+  return status;
+}