[llvm-branch-commits] [compiler-rt] [compiler-rt][ARM] Optimized single-precision FP comparisons (PR #179925)

Simon Tatham via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Mar 27 02:53:36 PDT 2026


https://github.com/statham-arm updated https://github.com/llvm/llvm-project/pull/179925

>From 017a0e3bac60a714ef6923eead584a91eebbf2a1 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Thu, 29 Jan 2026 16:10:11 +0000
Subject: [PATCH 1/7] [compiler-rt][ARM] Optimized single-precision FP
 comparisons

These comparison functions follow the same structure as the
double-precision ones in a prior commit, of a header file containing
the main logic and some entry points varying the construction of the
return value.

In this case, we have provided versions for Thumb1 as well as
Arm/Thumb2.
---
 compiler-rt/lib/builtins/CMakeLists.txt       |   9 +
 compiler-rt/lib/builtins/arm/cmpsf2.S         |  56 +++
 compiler-rt/lib/builtins/arm/fcmp.h           | 174 +++++++
 compiler-rt/lib/builtins/arm/gesf2.S          |  54 +++
 compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S  |  55 +++
 compiler-rt/lib/builtins/arm/thumb1/fcmp.h    | 191 ++++++++
 compiler-rt/lib/builtins/arm/thumb1/gesf2.S   |  54 +++
 .../lib/builtins/arm/thumb1/unordsf2.S        |  49 ++
 compiler-rt/lib/builtins/arm/unordsf2.S       |  56 +++
 .../test/builtins/Unit/comparesf2new_test.c   | 433 ++++++++++++++++++
 10 files changed, 1131 insertions(+)
 create mode 100644 compiler-rt/lib/builtins/arm/cmpsf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/fcmp.h
 create mode 100644 compiler-rt/lib/builtins/arm/gesf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/fcmp.h
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/gesf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/thumb1/unordsf2.S
 create mode 100644 compiler-rt/lib/builtins/arm/unordsf2.S
 create mode 100644 compiler-rt/test/builtins/Unit/comparesf2new_test.c

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 0e8b0fa553442..0c53781a51392 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -451,8 +451,11 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP AND BUILTIN_SUPPORTED_ARCH MATCHES "arm")
       arm/muldf3.S
       arm/divdf3.S
       arm/cmpdf2.S
+      arm/cmpsf2.S
       arm/gedf2.S
+      arm/gesf2.S
       arm/unorddf2.S
+      arm/unordsf2.S
       )
     set_source_files_properties(${assembly_files}
       PROPERTIES COMPILE_OPTIONS ${implicit_it_flag})
@@ -507,8 +510,11 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP)
   set(thumb1_base_SOURCES
     arm/thumb1/mulsf3.S
     arm/thumb1/cmpdf2.S
+    arm/thumb1/cmpsf2.S
     arm/thumb1/gedf2.S
+    arm/thumb1/gesf2.S
     arm/thumb1/unorddf2.S
+    arm/thumb1/unordsf2.S
     arm/fnan2.c
     arm/fnorm2.c
     arm/funder.c
@@ -516,6 +522,9 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP)
   )
   set_property(SOURCE arm/thumb1/cmpdf2.S PROPERTY crt_supersedes comparedf2.c)
   set_property(SOURCE arm/thumb1/cmpdf2.S DIRECTORY ${COMPILER_RT_SOURCE_DIR} PROPERTY crt_provides comparedf2)
+  set_property(SOURCE arm/thumb1/cmpsf2.S PROPERTY crt_supersedes comparesf2.S)
+  # We don't need to set 'crt_provides' for cmpsf2.S, because the
+  # superseded comparesf2.S will already have enabled the comparesf2 tests.
 endif()
 
 set(arm_EABI_RT_SOURCES
diff --git a/compiler-rt/lib/builtins/arm/cmpsf2.S b/compiler-rt/lib/builtins/arm/cmpsf2.S
new file mode 100644
index 0000000000000..14166246101af
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/cmpsf2.S
@@ -0,0 +1,56 @@
+//===-- cmpsf2.S - single-precision floating point comparison -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2: it's a three-way compare
+// which returns <0 if x<y, 0 if x==y, and >0 if x>y. If the result is
+// unordered (i.e. x or y or both is NaN) then it returns >0.
+//
+// This also makes it suitable for use as all of __eqsf2, __nesf2, __ltsf2 or
+// __lesf2.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  mov r0, #0
+  movhi r0, #1
+  movlo r0, #-1
+.endm
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__cmpsf2)
+  push {r4, lr}
+  vmov r0, s0
+  vmov r1, s1
+  bl __compiler_rt_softfp_cmpsf2
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __compiler_rt_softfp_cmpsf2)
+#endif
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__eqsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __cmpsf2)
+
+DEFINE_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  mov r0, #+1
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/fcmp.h b/compiler-rt/lib/builtins/arm/fcmp.h
new file mode 100644
index 0000000000000..23bdd73a10c5b
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/fcmp.h
@@ -0,0 +1,174 @@
+//===-- fcmp.h - shared code for single-precision FP comparison functions -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This code is the skeleton of a double-precision FP compare, with two details
+// left out: which input value is in which register, and how to make the return
+// value. It allows the main comparison logic to be shared between (for
+// example) __lesf2 and __gesf2, varying only those details.
+//
+//===----------------------------------------------------------------------===//
+
+// How to use this header file:
+//
+// This header file is expected to be #included from inside a function
+// definition in a .S file. The source file including this header should
+// provide the following:
+//
+// op0 and op1: register aliases (via .req) for the registers containing the
+// input operands.
+//  - For most comparisons, op0 will correspond to r0 and op1 to r1.
+//  - But a function with the reversed semantics of __aeabi_cfrcmple wil define
+//    them the other way round.
+//
+// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
+// an appropriate return value in r0, for the cases that do *not* involve NaN.
+//  - On entry to this macro, the condition codes LO, EQ and HI indicate that
+//    op0 < op1, op0 == op1 or op0 > op1 respectively.
+//  - For functions that return a result in the flags, this macro can be empty,
+//    because those are the correct flags to return anyway.
+//  - Functions that return a boolean in r0 should set it up by checking the
+//    flags.
+//
+// LOCAL_LABEL(NaN): a label defined within the compare function, after the
+// #include of this header. Called when at least one input is a NaN, and sets
+// up the appropriate return value for that case.
+
+// --------------------------------------------------
+// The actual entry point of the compare function.
+//
+// The basic plan is to start by ORing together the two inputs. This tells us
+// two things:
+//  - the top bit of the output tells us whether both inputs are positive, or
+//    whether at least one is negative
+//  - if the 8 exponent bits of the output are not all 1, then there are
+//    definitely no NaNs, so a fast path can handle most non-NaN cases.
+
+  // First diverge control for the negative-numbers case.
+  orrs    r12, op0, op1
+  bmi     LOCAL_LABEL(negative)         // high bit set => at least one negative input
+
+  // Here, both inputs are positive. Try adding 1<<23 to their bitwise OR in
+  // r12. This will carry all the way into the top bit, setting the N flag, if
+  // all 8 exponent bits were set.
+  cmn     r12, #1 << 23
+  bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
+
+  // The fastest fast path: both inputs positive and we could easily tell there
+  // were no NaNs. So we just compare op0 and op1 as unsigned integers.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_positive):
+  // Second tier for positive numbers. We come here if both inputs are
+  // positive, but our fast initial check didn't manage to rule out a NaN. But
+  // it's not guaranteed that there _is_ a NaN, for two reasons:
+  //
+  //  1. An input with exponent 0xFF might be an infinity instead. Those behave
+  //    normally under comparison.
+  //
+  //  2. There might not even _be_ an input with exponent 0xFF. All we know so
+  //     far is that the two inputs ORed together had all the exponent bits
+  //     set. So each of those bits is set in _at least one_ of the inputs, but
+  //     not necessarily all in the _same_ input.
+  //
+  // Test each exponent individually for 0xFF, using the same CMN idiom as
+  // above. If neither one carries into the sign bit then we have no NaNs _or_
+  // infinities and can compare the registers and return again.
+  cmn     op0, #1 << 23
+  cmnpl   op1, #1 << 23
+  bmi     LOCAL_LABEL(NaN_check_positive)
+
+  // Second-tier return path, now we've ruled out anything difficult.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaN_check_positive):
+  // Third tier for positive numbers. Here we know that at least one of the
+  // inputs has exponent 0xFF. But they might still be infinities rather than
+  // NaNs. So now we must check whether there's an actual NaN, by shifting each
+  // input left to get rid of the sign bit, and seeing if the result is
+  // _greater_ than 0xFF000000 (but not equal).
+  //
+  // We could have skipped the second-tier check and done this more rigorous
+  // test immediately. But that would cost an extra instruction in the case
+  // where there are no infinities or NaNs, and we assume that that is so much
+  // more common that it's worth optimizing for.
+  mov     r12, #0xFF << 24
+  cmp     r12, op0, LSL #1   // if LO, then r12 < (op0 << 1), so op0 is a NaN
+  cmphs   r12, op1, LSL #1   // if not LO, then do the same check for op1
+  blo     LOCAL_LABEL(NaN)           // now, if LO, there's definitely a NaN
+
+  // Now we've finally ruled out NaNs! And we still know both inputs are
+  // positive. So the third-tier return path can just compare the numbers
+  // again.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(negative):
+  // We come here if at least one operand is negative. We haven't checked for
+  // NaNs at all yet (the sign check came first), so repeat the first-tier
+  // check strategy of seeing if all exponent bits are set in r12.
+  //
+  // On this path, the sign bit in r12 is set, so if adding 1 to the low
+  // exponent bit carries all the way through into the sign bit, it will
+  // _clear_ the sign bit rather than setting it. So we expect MI to be the
+  // "definitely no NaNs" result, where it was PL on the positive branch.
+  cmn     r12, #1 << 23
+  bpl     LOCAL_LABEL(NaNInf_check_negative)
+
+  // Now we have no NaNs, but at least one negative number. This gives us two
+  // complications:
+  //
+  //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
+  //     have to consider separately the cases of "both negative" and "one of
+  //     each sign".
+  //
+  //  2. -0 and +0 are required to compare equal.
+  //
+  // But problem #1 is not as hard as it sounds! If both operands are negative,
+  // then we can get the result we want by comparing them as unsigned integers
+  // the opposite way round, because the input with the smaller value (as an
+  // integer) is the larger number in an FP ordering sense. And if one operand
+  // is negative and the other is positive, the _same_ reversed comparison
+  // works, because the positive number (with zero sign bit) will always
+  // compare less than the negative one in an unsigned-integers sense.
+  //
+  // So we only have to worry about problem #2, signed zeroes. This only
+  // affects the answer if _both_ operands are zero. And we can check that
+  // easily, because it happens if and only if r12 = 0x80000000. (We know r12
+  // has its sign bit set; if it has no other bits set, that's because both
+  // inputs were either 0x80000000 or 0x00000000.)
+  cmp     r12, #0x80000000        // EQ if both inputs are zero
+  cmpne   op1, op0                // otherwise, compare them backwards
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_negative):
+  // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
+  // but again, we might not have either _actual_ exponent 0xFF, and also, an
+  // exponent 0xFF might be an infinity instead of a NaN.
+  //
+  // On this path we've already branched twice (once for negative numbers and
+  // once for the first-tier NaN check), so we'll just go straight to the
+  // precise check for NaNs.
+  mov     r12, #0xFF << 24
+  cmp     r12, op0, LSL #1   // if LO, then r12 < (op0 << 1), so op0 is a NaN
+  cmphs   r12, op1, LSL #1   // if not LO, then do the same check for op1
+  blo     LOCAL_LABEL(NaN)
+
+  // Now we've ruled out NaNs, so we can just compare the two input registers
+  // and return. On this path we _don't_ need to check for the special case of
+  // comparing two zeroes, because we only came here if the bitwise OR of the
+  // exponent fields was 0xFF, which means the exponents can't both have been
+  // zero! So we can _just_ do the reversed CMP and finish.
+  cmp     op1, op0
+  SetReturnRegister
+  bx      lr
diff --git a/compiler-rt/lib/builtins/arm/gesf2.S b/compiler-rt/lib/builtins/arm/gesf2.S
new file mode 100644
index 0000000000000..c149eea589f05
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/gesf2.S
@@ -0,0 +1,54 @@
+//===-- gesf2.S - single-precision floating point comparison --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2, except for its NaN
+// handling. It's a three-way compare which returns <0 if x<y, 0 if x==y, and
+// >0 if x>y. If the result is unordered (i.e. x or y or both is NaN) then it
+// returns <0, where __cmpsf2 would return >0.
+//
+// This also makes it suitable for use as __gtsf2 or __gesf2 (or __eqsf2 or
+// __nesf2).
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  mov r0, #0
+  movhi r0, #1
+  movlo r0, #-1
+.endm
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__gesf2)
+  push {r4, lr}
+  vmov r0, s0
+  vmov r1, s1
+  bl __compiler_rt_softfp_gesf2
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __compiler_rt_softfp_gesf2)
+#endif
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gtsf2, __gesf2)
+
+DEFINE_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  mov r0, #-1
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
new file mode 100644
index 0000000000000..c8611d1147366
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
@@ -0,0 +1,55 @@
+//===-- cmpsf2.S - single-precision floating point comparison -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2: it's a three-way compare
+// which returns <0 if x<y, 0 if x==y, and >0 if x>y. If the result is
+// unordered (i.e. x or y or both is NaN) then it returns >0.
+//
+// This also makes it suitable for use as all of __eqsf2, __nesf2, __ltsf2 or
+// __lesf2.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  bhi 0f
+  blo 1f
+  movs r0, #0
+  bx lr
+0:
+  movs r0, #1
+  bx lr
+1:
+  movs r0, #1
+  rsbs r0, r0, #0
+  bx lr
+.endm
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__cmpsf2, __compiler_rt_softfp_cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__lesf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__ltsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__eqsf2, __cmpsf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__nesf2, __cmpsf2)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__compiler_rt_softfp_cmpsf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  movs r0, #1
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_cmpsf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
new file mode 100644
index 0000000000000..bcfe928407e3c
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
@@ -0,0 +1,191 @@
+//===-- fcmp.h - shared code for single-precision FP comparison functions -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This code is the skeleton of a double-precision FP compare, with two details
+// left out: which input value is in which register, and how to make the return
+// value. It allows the main comparison logic to be shared between (for
+// example) __lesf2 and __gesf2, varying only those details.
+//
+//===----------------------------------------------------------------------===//
+
+// How to use this header file:
+//
+// This header file is expected to be #included from inside a function
+// definition in a .S file. The source file including this header should
+// provide the following:
+//
+// op0 and op1: register aliases (via .req) for the registers containing the
+// input operands.
+//  - For most comparisons, op0 will correspond to r0 and op1 to r1.
+//  - But a function with the reversed semantics of __aeabi_cfrcmple wil define
+//    them the other way round.
+//
+// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
+// an appropriate return value in r0, for the cases that do *not* involve NaN.
+//  - On entry to this macro, the condition codes LO, EQ and HI indicate that
+//    op0 < op1, op0 == op1 or op0 > op1 respectively.
+//  - For functions that return a result in the flags, this macro can be empty,
+//    because those are the correct flags to return anyway.
+//  - Functions that return a boolean in r0 should set it up by checking the
+//    flags.
+//
+// LOCAL_LABEL(NaN): a label defined within the compare function, after the
+// #include of this header. Called when at least one input is a NaN, and sets
+// up the appropriate return value for that case.
+
+// --------------------------------------------------
+// The actual entry point of the compare function.
+//
+// The basic plan is to start by ORing together the two inputs. This tells us
+// two things:
+//  - the top bit of the output tells us whether both inputs are positive, or
+//    whether at least one is negative
+//  - if the 8 exponent bits of the output are not all 1, then there are
+//    definitely no NaNs, so a fast path can handle most non-NaN cases.
+
+  // Set up the constant 1 << 23 in a register, which we'll need on all
+  // branches.
+  movs    r3, #1
+  lsls    r3, r3, #23
+
+  // Diverge control for the negative-numbers case.
+  movs    r2, op0
+  orrs    r2, r2, op1
+  bmi     LOCAL_LABEL(negative)         // high bit set => at least one negative input
+
+  // Here, both inputs are positive. Try adding 1<<23 to their bitwise OR in
+  // r2. This will carry all the way into the top bit, setting the N flag, if
+  // all 8 exponent bits were set.
+  cmn     r2, r3
+  bmi     LOCAL_LABEL(NaNInf_check_positive) // need to look harder for NaNs
+
+  // The fastest fast path: both inputs positive and we could easily tell there
+  // were no NaNs. So we just compare op0 and op1 as unsigned integers.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_positive):
+  // Second tier for positive numbers. We come here if both inputs are
+  // positive, but our fast initial check didn't manage to rule out a NaN. But
+  // it's not guaranteed that there _is_ a NaN, for two reasons:
+  //
+  //  1. An input with exponent 0xFF might be an infinity instead. Those behave
+  //    normally under comparison.
+  //
+  //  2. There might not even _be_ an input with exponent 0xFF. All we know so
+  //     far is that the two inputs ORed together had all the exponent bits
+  //     set. So each of those bits is set in _at least one_ of the inputs, but
+  //     not necessarily all in the _same_ input.
+  //
+  // Test each exponent individually for 0xFF, using the same CMN idiom as
+  // above. If neither one carries into the sign bit then we have no NaNs _or_
+  // infinities and can compare the registers and return again.
+  cmn     op0, r3
+  bmi     LOCAL_LABEL(NaN_check_positive)
+  cmn     op1, r3
+  bmi     LOCAL_LABEL(NaN_check_positive)
+
+  // Second-tier return path, now we've ruled out anything difficult.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaN_check_positive):
+  // Third tier for positive numbers. Here we know that at least one of the
+  // inputs has exponent 0xFF. But they might still be infinities rather than
+  // NaNs. So now we must check whether there's an actual NaN, by shifting each
+  // input left to get rid of the sign bit, and seeing if the result is
+  // _greater_ than 0xFF000000 (but not equal).
+  //
+  // We could have skipped the second-tier check and done this more rigorous
+  // test immediately. But that would cost an extra instruction in the case
+  // where there are no infinities or NaNs, and we assume that that is so much
+  // more common that it's worth optimizing for.
+  movs    r2, #0xFF
+  lsls    r2, r2, #24
+  lsls    r3, op0, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+  lsls    r3, op1, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+
+  // Now we've finally ruled out NaNs! And we still know both inputs are
+  // positive. So the third-tier return path can just compare the numbers
+  // again.
+  cmp     op0, op1
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(negative):
+  // We come here if at least one operand is negative. We haven't checked for
+  // NaNs at all yet (the sign check came first), so repeat the first-tier
+  // check strategy of seeing if all exponent bits are set in r12.
+  //
+  // On this path, the sign bit in r12 is set, so if adding 1 to the low
+  // exponent bit carries all the way through into the sign bit, it will
+  // _clear_ the sign bit rather than setting it. So we expect MI to be the
+  // "definitely no NaNs" result, where it was PL on the positive branch.
+  cmn     r2, r3
+  bpl     LOCAL_LABEL(NaNInf_check_negative)
+
+  // Now we have no NaNs, but at least one negative number. This gives us two
+  // complications:
+  //
+  //  1. Floating-point numbers are sign/magnitude, not two's complement, so we
+  //     have to consider separately the cases of "both negative" and "one of
+  //     each sign".
+  //
+  //  2. -0 and +0 are required to compare equal.
+  //
+  // But problem #1 is not as hard as it sounds! If both operands are negative,
+  // then we can get the result we want by comparing them as unsigned integers
+  // the opposite way round, because the input with the smaller value (as an
+  // integer) is the larger number in an FP ordering sense. And if one operand
+  // is negative and the other is positive, the _same_ reversed comparison
+  // works, because the positive number (with zero sign bit) will always
+  // compare less than the negative one in an unsigned-integers sense.
+  //
+  // So we only have to worry about problem #2, signed zeroes. This only
+  // affects the answer if _both_ operands are zero. And we can check that
+  // easily, because it happens if and only if r12 = 0x80000000. (We know r12
+  // has its sign bit set; if it has no other bits set, that's because both
+  // inputs were either 0x80000000 or 0x00000000.)
+  lsls    r2, r2, #1              // EQ if both inputs are zero (also sets C)
+  beq     1f
+  cmp     op1, op0                // otherwise, compare them backwards
+1:
+  SetReturnRegister
+  bx      lr
+
+LOCAL_LABEL(NaNInf_check_negative):
+  // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
+  // but again, we might not have either _actual_ exponent 0xFF, and also, an
+  // exponent 0xFF might be an infinity instead of a NaN.
+  //
+  // On this path we've already branched twice (once for negative numbers and
+  // once for the first-tier NaN check), so we'll just go straight to the
+  // precise check for NaNs.
+  movs    r2, #0xFF
+  lsls    r2, r2, #24
+  lsls    r3, op0, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+  lsls    r3, op1, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+
+  // Now we've ruled out NaNs, so we can just compare the two input registers
+  // and return. On this path we _don't_ need to check for the special case of
+  // comparing two zeroes, because we only came here if the bitwise OR of the
+  // exponent fields was 0xFF, which means the exponents can't both have been
+  // zero! So we can _just_ do the reversed CMP and finish.
+  cmp     op1, op0
+  SetReturnRegister
+  bx      lr
diff --git a/compiler-rt/lib/builtins/arm/thumb1/gesf2.S b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
new file mode 100644
index 0000000000000..aa75ec7b0a67b
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
@@ -0,0 +1,54 @@
+//===-- gesf2.S - single-precision floating point comparison --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This function has the semantics of GNU __cmpsf2, except for its NaN
+// handling. It's a three-way compare which returns <0 if x<y, 0 if x==y, and
+// >0 if x>y. If the result is unordered (i.e. x or y or both is NaN) then it
+// returns <0, where __cmpsf2 would return >0.
+//
+// This also makes it suitable for use as __gtsf2 or __gesf2 (or __eqsf2 or
+// __nesf2).
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+op0 .req r0
+op1 .req r1
+.macro SetReturnRegister
+  bhi 0f
+  blo 1f
+  movs r0, #0
+  bx lr
+0:
+  movs r0, #1
+  bx lr
+1:
+  movs r0, #1
+  rsbs r0, r0, #0
+  bx lr
+.endm
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gesf2, __compiler_rt_softfp_gesf2)
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__gtsf2, __gesf2)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__compiler_rt_softfp_gesf2)
+  #include "fcmp.h"
+
+LOCAL_LABEL(NaN):
+  movs r0, #1
+  rsbs r0, r0, #0
+  bx lr
+
+END_COMPILERRT_FUNCTION(__compiler_rt_softfp_gesf2)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S b/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S
new file mode 100644
index 0000000000000..5d74e0fdfe159
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/thumb1/unordsf2.S
@@ -0,0 +1,49 @@
+//===-- unordsf2.S - single-precision floating point comparison -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Return 1 if the result of comparing x with y is 'unordered', i.e.
+// one of x and y is NaN.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../../assembly.h"
+
+  .syntax unified
+  .text
+  .p2align 2
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__unordsf2, __aeabi_fcmpun)
+
+DEFINE_COMPILERRT_THUMB_FUNCTION(__aeabi_fcmpun)
+
+  // This function isn't based on the general-purpose code in fcmp.h, because
+  // it's more effort than needed. Here we just need to identify whether or not
+  // there's at least one NaN in the inputs. There's no need to vary that check
+  // based on the sign bit, so we might as well just do the NaN test as quickly
+  // as possible.
+  movs    r2, #0xFF
+  lsls    r2, r2, #24
+  lsls    r3, r0, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+  lsls    r3, r1, #1
+  cmp     r3, r2
+  bhi     LOCAL_LABEL(NaN)
+
+  // If HS, then we have no NaNs and return false.
+  movs    r0, #0
+  bx      lr
+
+  // Otherwise, we have at least one NaN, and return true.
+LOCAL_LABEL(NaN):
+  movs    r0, #1
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_fcmpun)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/lib/builtins/arm/unordsf2.S b/compiler-rt/lib/builtins/arm/unordsf2.S
new file mode 100644
index 0000000000000..1930996779888
--- /dev/null
+++ b/compiler-rt/lib/builtins/arm/unordsf2.S
@@ -0,0 +1,56 @@
+//===-- unordsf2.S - single-precision floating point comparison -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Return 1 if the result of comparing x with y is 'unordered', i.e.
+// one of x and y is NaN.
+//
+//===----------------------------------------------------------------------===//
+
+#include "../assembly.h"
+
+
+  .syntax unified
+  .text
+  .p2align 2
+
+#if __ARM_PCS_VFP
+DEFINE_COMPILERRT_FUNCTION(__unordsf2)
+  push {r4, lr}
+  vmov r0, s0
+  vmov r1, s1
+  bl __aeabi_fcmpun
+  pop {r4, pc}
+#else
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__unordsf2, __aeabi_fcmpun)
+#endif
+
+DEFINE_COMPILERRT_FUNCTION(__aeabi_fcmpun)
+
+  // This function isn't based on the general-purpose code in fcmp.h, because
+  // it's more effort than needed. Here we just need to identify whether or not
+  // there's at least one NaN in the inputs. There's no need to vary that check
+  // based on the sign bit, so we might as well just do the NaN test as quickly
+  // as possible.
+  mov     r12, #0xFF << 24
+  cmp     r12, r0, lsl #1    // if LO, then r12 < (r0 << 1), so r0 is a NaN
+  cmphs   r12, r1, lsl #1    // if not LO, then do the same check for r1
+
+  // If HS, then we have no NaNs and return false. We do this as quickly as we
+  // can (not stopping to take two instructions setting up r0 for both
+  // possibilities), on the assumption that NaNs are rare and we want to
+  // optimize for the non-NaN path.
+  movhs   r0, #0
+  bxhs    lr
+
+  // Otherwise, we have at least one NaN, and return true.
+  mov     r0, #1
+  bx      lr
+
+END_COMPILERRT_FUNCTION(__aeabi_fcmpun)
+
+NO_EXEC_STACK_DIRECTIVE
diff --git a/compiler-rt/test/builtins/Unit/comparesf2new_test.c b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
new file mode 100644
index 0000000000000..5c8be88354618
--- /dev/null
+++ b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
@@ -0,0 +1,433 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_comparesf2
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+COMPILER_RT_ABI int __eqsf2(float, float);
+COMPILER_RT_ABI int __nesf2(float, float);
+COMPILER_RT_ABI int __gesf2(float, float);
+COMPILER_RT_ABI int __gtsf2(float, float);
+COMPILER_RT_ABI int __lesf2(float, float);
+COMPILER_RT_ABI int __ltsf2(float, float);
+COMPILER_RT_ABI int __cmpsf2(float, float);
+COMPILER_RT_ABI int __unordsf2(float, float);
+
+enum Result {
+  RESULT_LT,
+  RESULT_GT,
+  RESULT_EQ,
+  RESULT_UN
+};
+
+int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name, int result, int ok, const char *expected) {
+  if (!ok)
+    printf("error at line %d: %s(%08" PRIx32 ", %08" PRIx32 ") = %d, expected %s\n",
+           line, name, a_rep, b_rep, result, expected);
+  return !ok;
+}
+
+int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep, enum Result result) {
+  float a = fromRep32(a_rep), b = fromRep32(b_rep);
+
+  int eq = __eqsf2(a, b);
+  int ne = __nesf2(a, b);
+  int ge = __gesf2(a, b);
+  int gt = __gtsf2(a, b);
+  int le = __lesf2(a, b);
+  int lt = __ltsf2(a, b);
+  int cmp = __cmpsf2(a, b);
+  int unord = __unordsf2(a, b);
+
+  int ret = 0;
+
+  switch (result) {
+  case RESULT_LT:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt < 0, "< 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == -1, "== -1");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    break;
+  case RESULT_GT:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt > 0, "> 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    break;
+  case RESULT_EQ:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq == 0, "== 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne == 0, "== 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 0, "== 0");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    break;
+  case RESULT_UN:
+    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
+    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0");
+    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
+    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0");
+    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
+    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1");
+    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 1, "== 1");
+    break;
+  }
+
+  return ret;
+}
+
+#define test__comparesf2(a,b,x) test__comparesf2(__LINE__,a,b,x)
+
+int main(void) {
+  int status = 0;
+
+  status |= test__comparesf2(0x00000000, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x00000000, 0x7f872da0, RESULT_UN);
+  status |= test__comparesf2(0x00000000, 0x7fe42e09, RESULT_UN);
+  status |= test__comparesf2(0x00000000, 0x80000000, RESULT_EQ);
+  status |= test__comparesf2(0x00000000, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x00000000, 0x807fffff, RESULT_GT);
+  status |= test__comparesf2(0x00000000, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x00000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0x00000001, RESULT_EQ);
+  status |= test__comparesf2(0x00000001, 0x3f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x3ffffffe, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x3fffffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f7ffffe, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x00000001, 0x7f94d5b9, RESULT_UN);
+  status |= test__comparesf2(0x00000001, 0x7fef53b1, RESULT_UN);
+  status |= test__comparesf2(0x00000001, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbf7fffff, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbffffffe, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xbfffffff, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xff7ffffe, RESULT_GT);
+  status |= test__comparesf2(0x00000001, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x00000002, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x00000003, 0x00000002, RESULT_GT);
+  status |= test__comparesf2(0x00000003, 0x40400000, RESULT_LT);
+  status |= test__comparesf2(0x00000003, 0x40a00000, RESULT_LT);
+  status |= test__comparesf2(0x00000003, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x00000003, 0xc0a00000, RESULT_GT);
+  status |= test__comparesf2(0x00000003, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x00000004, 0x00000004, RESULT_EQ);
+  status |= test__comparesf2(0x007ffffc, 0x807ffffc, RESULT_GT);
+  status |= test__comparesf2(0x007ffffd, 0x007ffffe, RESULT_LT);
+  status |= test__comparesf2(0x007fffff, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x007fffff, 0x007ffffe, RESULT_GT);
+  status |= test__comparesf2(0x007fffff, 0x007fffff, RESULT_EQ);
+  status |= test__comparesf2(0x007fffff, 0x00800000, RESULT_LT);
+  status |= test__comparesf2(0x007fffff, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x007fffff, 0x7fa111d3, RESULT_UN);
+  status |= test__comparesf2(0x007fffff, 0x7ff43134, RESULT_UN);
+  status |= test__comparesf2(0x007fffff, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x007fffff, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x00800000, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x00800000, 0x00800000, RESULT_EQ);
+  status |= test__comparesf2(0x00800000, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x00800001, 0x00800000, RESULT_GT);
+  status |= test__comparesf2(0x00800001, 0x00800002, RESULT_LT);
+  status |= test__comparesf2(0x00ffffff, 0x01000000, RESULT_LT);
+  status |= test__comparesf2(0x00ffffff, 0x01000002, RESULT_LT);
+  status |= test__comparesf2(0x00ffffff, 0x01000004, RESULT_LT);
+  status |= test__comparesf2(0x01000000, 0x00ffffff, RESULT_GT);
+  status |= test__comparesf2(0x01000001, 0x00800001, RESULT_GT);
+  status |= test__comparesf2(0x01000001, 0x00ffffff, RESULT_GT);
+  status |= test__comparesf2(0x01000002, 0x00800001, RESULT_GT);
+  status |= test__comparesf2(0x017fffff, 0x01800000, RESULT_LT);
+  status |= test__comparesf2(0x01800000, 0x017fffff, RESULT_GT);
+  status |= test__comparesf2(0x01800001, 0x017fffff, RESULT_GT);
+  status |= test__comparesf2(0x01800002, 0x01000003, RESULT_GT);
+  status |= test__comparesf2(0x3f000000, 0x3f000000, RESULT_EQ);
+  status |= test__comparesf2(0x3f7fffff, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x3f7fffff, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x3f800000, 0x3f800000, RESULT_EQ);
+  status |= test__comparesf2(0x3f800000, 0x3f800003, RESULT_LT);
+  status |= test__comparesf2(0x3f800000, 0x40000000, RESULT_LT);
+  status |= test__comparesf2(0x3f800000, 0x40e00000, RESULT_LT);
+  status |= test__comparesf2(0x3f800000, 0x7fb27f62, RESULT_UN);
+  status |= test__comparesf2(0x3f800000, 0x7fd9d4b4, RESULT_UN);
+  status |= test__comparesf2(0x3f800000, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x3f800000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x3f800000, 0xbf800003, RESULT_GT);
+  status |= test__comparesf2(0x3f800001, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x3f800001, 0x3f800002, RESULT_LT);
+  status |= test__comparesf2(0x3f800001, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x3ffffffc, 0x3ffffffd, RESULT_LT);
+  status |= test__comparesf2(0x3fffffff, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x3fffffff, 0x40000000, RESULT_LT);
+  status |= test__comparesf2(0x40000000, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0x3fffffff, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0x40000000, RESULT_EQ);
+  status |= test__comparesf2(0x40000000, 0x40000001, RESULT_LT);
+  status |= test__comparesf2(0x40000000, 0xc0000000, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0xc0000001, RESULT_GT);
+  status |= test__comparesf2(0x40000000, 0xc0a00000, RESULT_GT);
+  status |= test__comparesf2(0x40000001, 0x3f800001, RESULT_GT);
+  status |= test__comparesf2(0x40000001, 0x40000002, RESULT_LT);
+  status |= test__comparesf2(0x40000001, 0xc0000002, RESULT_GT);
+  status |= test__comparesf2(0x40000002, 0x3f800001, RESULT_GT);
+  status |= test__comparesf2(0x40000002, 0x3f800003, RESULT_GT);
+  status |= test__comparesf2(0x40000004, 0x40000003, RESULT_GT);
+  status |= test__comparesf2(0x40400000, 0x40400000, RESULT_EQ);
+  status |= test__comparesf2(0x407fffff, 0x407ffffe, RESULT_GT);
+  status |= test__comparesf2(0x407fffff, 0x40800002, RESULT_LT);
+  status |= test__comparesf2(0x40800001, 0x407fffff, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x40a00000, 0xc0a00000, RESULT_GT);
+  status |= test__comparesf2(0x7d800001, 0x7d7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7e7fffff, 0x7e7ffffe, RESULT_GT);
+  status |= test__comparesf2(0x7e7fffff, 0x7e800002, RESULT_LT);
+  status |= test__comparesf2(0x7e800000, 0x7e7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7e800000, 0x7e800000, RESULT_EQ);
+  status |= test__comparesf2(0x7e800000, 0x7e800001, RESULT_LT);
+  status |= test__comparesf2(0x7e800001, 0x7e800000, RESULT_GT);
+  status |= test__comparesf2(0x7e800001, 0x7f000001, RESULT_LT);
+  status |= test__comparesf2(0x7e800001, 0xfe800000, RESULT_GT);
+  status |= test__comparesf2(0x7e800002, 0x7e000003, RESULT_GT);
+  status |= test__comparesf2(0x7e800004, 0x7e800003, RESULT_GT);
+  status |= test__comparesf2(0x7efffffe, 0x7efffffe, RESULT_EQ);
+  status |= test__comparesf2(0x7efffffe, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0x7efffffe, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0x7effffff, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7effffff, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x7effffff, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7effffff, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0x7f000000, RESULT_EQ);
+  status |= test__comparesf2(0x7f000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x7f000000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x7f000001, 0x7f000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000001, 0x7f000002, RESULT_LT);
+  status |= test__comparesf2(0x7f000001, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f000002, 0x7e800001, RESULT_GT);
+  status |= test__comparesf2(0x7f7ffffe, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7f7ffffe, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x7f7ffffe, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7f7ffffe, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0x3f800000, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0x7f7fffff, RESULT_EQ);
+  status |= test__comparesf2(0x7f7fffff, 0x7fbed1eb, RESULT_UN);
+  status |= test__comparesf2(0x7f7fffff, 0x7fe15ee3, RESULT_UN);
+  status |= test__comparesf2(0x7f7fffff, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x7f7fffff, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x00000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x00000001, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x007fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x7f000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x7f7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x7f800000, RESULT_EQ);
+  status |= test__comparesf2(0x7f800000, 0x7f91a4da, RESULT_UN);
+  status |= test__comparesf2(0x7f800000, 0x7fd44a09, RESULT_UN);
+  status |= test__comparesf2(0x7f800000, 0x80000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0x807fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x7f800000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x7f86d066, 0x00000000, RESULT_UN);
+  status |= test__comparesf2(0x7f85a878, 0x00000001, RESULT_UN);
+  status |= test__comparesf2(0x7f8c0dca, 0x007fffff, RESULT_UN);
+  status |= test__comparesf2(0x7f822725, 0x3f800000, RESULT_UN);
+  status |= test__comparesf2(0x7f853870, 0x7f7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fbefc9d, 0x7f800000, RESULT_UN);
+  status |= test__comparesf2(0x7f9f84a9, 0x7f81461b, RESULT_UN);
+  status |= test__comparesf2(0x7f9e2c1d, 0x7fe4a313, RESULT_UN);
+  status |= test__comparesf2(0x7fb0e6d0, 0x80000000, RESULT_UN);
+  status |= test__comparesf2(0x7fac9171, 0x80000001, RESULT_UN);
+  status |= test__comparesf2(0x7f824ae6, 0x807fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fa8b9a0, 0xbf800000, RESULT_UN);
+  status |= test__comparesf2(0x7f92a1cd, 0xff7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fbe5d29, 0xff800000, RESULT_UN);
+  status |= test__comparesf2(0x7fcc9a57, 0x00000000, RESULT_UN);
+  status |= test__comparesf2(0x7fec9d71, 0x00000001, RESULT_UN);
+  status |= test__comparesf2(0x7fd5db76, 0x007fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fd003d9, 0x3f800000, RESULT_UN);
+  status |= test__comparesf2(0x7fca0684, 0x7f7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fc46aa0, 0x7f800000, RESULT_UN);
+  status |= test__comparesf2(0x7ff72b19, 0x7faee637, RESULT_UN);
+  status |= test__comparesf2(0x7fe9e0c1, 0x7fcc2788, RESULT_UN);
+  status |= test__comparesf2(0x7fc571ea, 0x80000000, RESULT_UN);
+  status |= test__comparesf2(0x7fd81a54, 0x80000001, RESULT_UN);
+  status |= test__comparesf2(0x7febdfaf, 0x807fffff, RESULT_UN);
+  status |= test__comparesf2(0x7ffa1f94, 0xbf800000, RESULT_UN);
+  status |= test__comparesf2(0x7ff38fa0, 0xff7fffff, RESULT_UN);
+  status |= test__comparesf2(0x7fdf3502, 0xff800000, RESULT_UN);
+  status |= test__comparesf2(0x80000000, 0x00000000, RESULT_EQ);
+  status |= test__comparesf2(0x80000000, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x80000000, 0x7fbdfb72, RESULT_UN);
+  status |= test__comparesf2(0x80000000, 0x7fdd528e, RESULT_UN);
+  status |= test__comparesf2(0x80000000, 0x80000001, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0x807fffff, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x80000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3ffffffe, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x3fffffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7f7ffffe, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0x80000001, 0x7fac481a, RESULT_UN);
+  status |= test__comparesf2(0x80000001, 0x7fcf111d, RESULT_UN);
+  status |= test__comparesf2(0x80000001, 0x80000001, RESULT_EQ);
+  status |= test__comparesf2(0x80000001, 0xbf7fffff, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xbf800000, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xbffffffe, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xbfffffff, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xff7ffffe, RESULT_GT);
+  status |= test__comparesf2(0x80000001, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0x80000002, 0x80000001, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0x40400000, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0x80000002, RESULT_LT);
+  status |= test__comparesf2(0x80000003, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0x80000004, 0x80000004, RESULT_EQ);
+  status |= test__comparesf2(0x807ffffd, 0x807ffffe, RESULT_GT);
+  status |= test__comparesf2(0x807fffff, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x7faf07f6, RESULT_UN);
+  status |= test__comparesf2(0x807fffff, 0x7fd18a54, RESULT_UN);
+  status |= test__comparesf2(0x807fffff, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x807ffffe, RESULT_LT);
+  status |= test__comparesf2(0x807fffff, 0x807fffff, RESULT_EQ);
+  status |= test__comparesf2(0x807fffff, 0x80800000, RESULT_GT);
+  status |= test__comparesf2(0x807fffff, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0x80800000, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0x80800000, 0x00800000, RESULT_LT);
+  status |= test__comparesf2(0x80800001, 0x80800000, RESULT_LT);
+  status |= test__comparesf2(0x80800001, 0x80800002, RESULT_GT);
+  status |= test__comparesf2(0x80ffffff, 0x81000000, RESULT_GT);
+  status |= test__comparesf2(0x80ffffff, 0x81000002, RESULT_GT);
+  status |= test__comparesf2(0x80ffffff, 0x81000004, RESULT_GT);
+  status |= test__comparesf2(0x81000000, 0x80ffffff, RESULT_LT);
+  status |= test__comparesf2(0x81000001, 0x80800001, RESULT_LT);
+  status |= test__comparesf2(0x81000001, 0x80ffffff, RESULT_LT);
+  status |= test__comparesf2(0x81000002, 0x80800001, RESULT_LT);
+  status |= test__comparesf2(0x817fffff, 0x81800000, RESULT_GT);
+  status |= test__comparesf2(0x81800000, 0x817fffff, RESULT_LT);
+  status |= test__comparesf2(0x81800001, 0x817fffff, RESULT_LT);
+  status |= test__comparesf2(0x81800002, 0x81000003, RESULT_LT);
+  status |= test__comparesf2(0xbf800000, 0x3f800003, RESULT_LT);
+  status |= test__comparesf2(0xbf800000, 0x7fa66ee9, RESULT_UN);
+  status |= test__comparesf2(0xbf800000, 0x7fe481ef, RESULT_UN);
+  status |= test__comparesf2(0xbf800000, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0xbf800000, 0xbf800003, RESULT_GT);
+  status |= test__comparesf2(0xbf800001, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xbf800001, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xbf800001, 0xbf800002, RESULT_GT);
+  status |= test__comparesf2(0xbffffffc, 0xbffffffd, RESULT_GT);
+  status |= test__comparesf2(0xbfffffff, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0xbfffffff, 0xc0000000, RESULT_GT);
+  status |= test__comparesf2(0xc0000000, 0x40000001, RESULT_LT);
+  status |= test__comparesf2(0xc0000000, 0xbfffffff, RESULT_LT);
+  status |= test__comparesf2(0xc0000000, 0xc0000001, RESULT_GT);
+  status |= test__comparesf2(0xc0000001, 0x40000002, RESULT_LT);
+  status |= test__comparesf2(0xc0000001, 0xbf800001, RESULT_LT);
+  status |= test__comparesf2(0xc0000001, 0xc0000002, RESULT_GT);
+  status |= test__comparesf2(0xc0000002, 0xbf800001, RESULT_LT);
+  status |= test__comparesf2(0xc0000002, 0xbf800003, RESULT_LT);
+  status |= test__comparesf2(0xc0000004, 0xc0000003, RESULT_LT);
+  status |= test__comparesf2(0xc0400000, 0x40400000, RESULT_LT);
+  status |= test__comparesf2(0xc07fffff, 0xc07ffffe, RESULT_LT);
+  status |= test__comparesf2(0xc07fffff, 0xc0800002, RESULT_GT);
+  status |= test__comparesf2(0xc0800001, 0xc07fffff, RESULT_LT);
+  status |= test__comparesf2(0xfd800001, 0xfd7fffff, RESULT_LT);
+  status |= test__comparesf2(0xfe7fffff, 0xfe7ffffe, RESULT_LT);
+  status |= test__comparesf2(0xfe7fffff, 0xfe800002, RESULT_GT);
+  status |= test__comparesf2(0xfe800000, 0xfe7fffff, RESULT_LT);
+  status |= test__comparesf2(0xfe800000, 0xfe800001, RESULT_GT);
+  status |= test__comparesf2(0xfe800001, 0x7e800000, RESULT_LT);
+  status |= test__comparesf2(0xfe800001, 0xfe800000, RESULT_LT);
+  status |= test__comparesf2(0xfe800001, 0xff000001, RESULT_GT);
+  status |= test__comparesf2(0xfe800002, 0xfe000003, RESULT_LT);
+  status |= test__comparesf2(0xfe800004, 0xfe800003, RESULT_LT);
+  status |= test__comparesf2(0xfefffffe, 0x7efffffe, RESULT_LT);
+  status |= test__comparesf2(0xfefffffe, 0x7effffff, RESULT_LT);
+  status |= test__comparesf2(0xfefffffe, 0xfefffffe, RESULT_EQ);
+  status |= test__comparesf2(0xfefffffe, 0xfeffffff, RESULT_GT);
+  status |= test__comparesf2(0xfeffffff, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xfeffffff, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0xfeffffff, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xfeffffff, 0xff000000, RESULT_GT);
+  status |= test__comparesf2(0xff000000, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xff000000, 0xff000000, RESULT_EQ);
+  status |= test__comparesf2(0xff000000, 0xff800000, RESULT_GT);
+  status |= test__comparesf2(0xff000001, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0xff000001, 0xff000000, RESULT_LT);
+  status |= test__comparesf2(0xff000001, 0xff000002, RESULT_GT);
+  status |= test__comparesf2(0xff000002, 0xfe800001, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xff7ffffe, 0xff7fffff, RESULT_GT);
+  status |= test__comparesf2(0xff7fffff, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0x3f800000, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0x7f919cff, RESULT_UN);
+  status |= test__comparesf2(0xff7fffff, 0x7fd729a7, RESULT_UN);
+  status |= test__comparesf2(0xff7fffff, 0x80000001, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0xbf800000, RESULT_LT);
+  status |= test__comparesf2(0xff7fffff, 0xff7fffff, RESULT_EQ);
+  status |= test__comparesf2(0xff800000, 0x00000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x00000001, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x007fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7f000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7f7fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7f800000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x7fafdbc1, RESULT_UN);
+  status |= test__comparesf2(0xff800000, 0x7fec80fe, RESULT_UN);
+  status |= test__comparesf2(0xff800000, 0x80000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x80000001, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0x807fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0xff000000, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0xff7fffff, RESULT_LT);
+  status |= test__comparesf2(0xff800000, 0xff800000, RESULT_EQ);
+
+  return status;
+}

>From beea5fc71745c540bffbbe3141c93ca7aa4a772a Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Thu, 5 Feb 2026 17:23:56 +0000
Subject: [PATCH 2/7] clang-format

---
 compiler-rt/lib/builtins/arm/fcmp.h            |  2 ++
 compiler-rt/lib/builtins/arm/thumb1/fcmp.h     |  2 ++
 .../test/builtins/Unit/comparesf2new_test.c    | 18 ++++++++----------
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/compiler-rt/lib/builtins/arm/fcmp.h b/compiler-rt/lib/builtins/arm/fcmp.h
index 23bdd73a10c5b..4860479f45158 100644
--- a/compiler-rt/lib/builtins/arm/fcmp.h
+++ b/compiler-rt/lib/builtins/arm/fcmp.h
@@ -48,6 +48,8 @@
 //  - if the 8 exponent bits of the output are not all 1, then there are
 //    definitely no NaNs, so a fast path can handle most non-NaN cases.
 
+// clang-format off
+
   // First diverge control for the negative-numbers case.
   orrs    r12, op0, op1
   bmi     LOCAL_LABEL(negative)         // high bit set => at least one negative input
diff --git a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
index bcfe928407e3c..0f0f46b158aad 100644
--- a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
+++ b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
@@ -48,6 +48,8 @@
 //  - if the 8 exponent bits of the output are not all 1, then there are
 //    definitely no NaNs, so a fast path can handle most non-NaN cases.
 
+// clang-format off
+
   // Set up the constant 1 << 23 in a register, which we'll need on all
   // branches.
   movs    r3, #1
diff --git a/compiler-rt/test/builtins/Unit/comparesf2new_test.c b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
index 5c8be88354618..02fac8ba2a3bd 100644
--- a/compiler-rt/test/builtins/Unit/comparesf2new_test.c
+++ b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
@@ -20,21 +20,19 @@ COMPILER_RT_ABI int __ltsf2(float, float);
 COMPILER_RT_ABI int __cmpsf2(float, float);
 COMPILER_RT_ABI int __unordsf2(float, float);
 
-enum Result {
-  RESULT_LT,
-  RESULT_GT,
-  RESULT_EQ,
-  RESULT_UN
-};
+enum Result { RESULT_LT, RESULT_GT, RESULT_EQ, RESULT_UN };
 
-int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name, int result, int ok, const char *expected) {
+int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name,
+           int result, int ok, const char *expected) {
   if (!ok)
-    printf("error at line %d: %s(%08" PRIx32 ", %08" PRIx32 ") = %d, expected %s\n",
+    printf("error at line %d: %s(%08" PRIx32 ", %08" PRIx32
+           ") = %d, expected %s\n",
            line, name, a_rep, b_rep, result, expected);
   return !ok;
 }
 
-int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep, enum Result result) {
+int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep,
+                     enum Result result) {
   float a = fromRep32(a_rep), b = fromRep32(b_rep);
 
   int eq = __eqsf2(a, b);
@@ -94,7 +92,7 @@ int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep, enum Result resul
   return ret;
 }
 
-#define test__comparesf2(a,b,x) test__comparesf2(__LINE__,a,b,x)
+#define test__comparesf2(a, b, x) test__comparesf2(__LINE__, a, b, x)
 
 int main(void) {
   int status = 0;

>From c17cb0053a15b211c243fec803702990bb8ddcc6 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Thu, 5 Feb 2026 17:10:26 +0000
Subject: [PATCH 3/7] Update to use set_special_properties

---
 compiler-rt/lib/builtins/CMakeLists.txt | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index ab741b0600973..a245cbd7e39b9 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -534,9 +534,10 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP)
   )
   set_special_properties(arm/thumb1/cmpdf2.S
     SUPERSEDES comparedf2.c PROVIDES comparedf2)
-  set_property(SOURCE arm/thumb1/cmpsf2.S PROPERTY crt_supersedes comparesf2.S)
-  # We don't need to set 'crt_provides' for cmpsf2.S, because the
-  # superseded comparesf2.S will already have enabled the comparesf2 tests.
+  set_special_properties(arm/thumb1/cmpsf2.S
+    SUPERSEDES comparesf2.S)
+  # We don't need to set PROVIDES for cmpsf2.S, because the superseded
+  # comparesf2.S will already have enabled the comparesf2 tests.
 endif()
 
 set(arm_EABI_RT_SOURCES

>From df5ed236cc931910e18735f4d0902eabcb22ec31 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Tue, 24 Feb 2026 16:40:32 +0000
Subject: [PATCH 4/7] Update tests to match #179918

---
 .../test/builtins/Unit/comparesf2new_test.c   | 74 +++++++++----------
 1 file changed, 37 insertions(+), 37 deletions(-)

diff --git a/compiler-rt/test/builtins/Unit/comparesf2new_test.c b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
index 02fac8ba2a3bd..5bed764b0957a 100644
--- a/compiler-rt/test/builtins/Unit/comparesf2new_test.c
+++ b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
@@ -22,8 +22,8 @@ COMPILER_RT_ABI int __unordsf2(float, float);
 
 enum Result { RESULT_LT, RESULT_GT, RESULT_EQ, RESULT_UN };
 
-int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name,
-           int result, int ok, const char *expected) {
+int expect(uint32_t a_rep, uint32_t b_rep, const char *name, int result, int ok,
+           const char *expected, int line) {
   if (!ok)
     printf("error at line %d: %s(%08" PRIx32 ", %08" PRIx32
            ") = %d, expected %s\n",
@@ -31,8 +31,8 @@ int expect(int line, uint32_t a_rep, uint32_t b_rep, const char *name,
   return !ok;
 }
 
-int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep,
-                     enum Result result) {
+int test__comparesf2(uint32_t a_rep, uint32_t b_rep, enum Result result,
+                     int line) {
   float a = fromRep32(a_rep), b = fromRep32(b_rep);
 
   int eq = __eqsf2(a, b);
@@ -48,51 +48,51 @@ int test__comparesf2(int line, uint32_t a_rep, uint32_t b_rep,
 
   switch (result) {
   case RESULT_LT:
-    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
-    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
-    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0");
-    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
-    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0");
-    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt < 0, "< 0");
-    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == -1, "== -1");
-    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    ret |= expect(a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0", line);
+    ret |= expect(a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0", line);
+    ret |= expect(a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0", line);
+    ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0", line);
+    ret |= expect(a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0", line);
+    ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt < 0, "< 0", line);
+    ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == -1, "== -1", line);
+    ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0", line);
     break;
   case RESULT_GT:
-    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
-    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
-    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0");
-    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt > 0, "> 0");
-    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0");
-    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
-    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1");
-    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    ret |= expect(a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0", line);
+    ret |= expect(a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0", line);
+    ret |= expect(a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0", line);
+    ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt > 0, "> 0", line);
+    ret |= expect(a_rep, b_rep, "__lesf2", le, le > 0, "> 0", line);
+    ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0", line);
+    ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1", line);
+    ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0", line);
     break;
   case RESULT_EQ:
-    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq == 0, "== 0");
-    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne == 0, "== 0");
-    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0");
-    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
-    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0");
-    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
-    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 0, "== 0");
-    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0");
+    ret |= expect(a_rep, b_rep, "__eqsf2", eq, eq == 0, "== 0", line);
+    ret |= expect(a_rep, b_rep, "__nesf2", ne, ne == 0, "== 0", line);
+    ret |= expect(a_rep, b_rep, "__gesf2", ge, ge >= 0, ">= 0", line);
+    ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0", line);
+    ret |= expect(a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0", line);
+    ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0", line);
+    ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == 0, "== 0", line);
+    ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0", line);
     break;
   case RESULT_UN:
-    ret |= expect(line, a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0");
-    ret |= expect(line, a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0");
-    ret |= expect(line, a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0");
-    ret |= expect(line, a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0");
-    ret |= expect(line, a_rep, b_rep, "__lesf2", le, le > 0, "> 0");
-    ret |= expect(line, a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0");
-    ret |= expect(line, a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1");
-    ret |= expect(line, a_rep, b_rep, "__unordsf2", unord, unord == 1, "== 1");
+    ret |= expect(a_rep, b_rep, "__eqsf2", eq, eq != 0, "!= 0", line);
+    ret |= expect(a_rep, b_rep, "__nesf2", ne, ne != 0, "!= 0", line);
+    ret |= expect(a_rep, b_rep, "__gesf2", ge, ge < 0, "< 0", line);
+    ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0", line);
+    ret |= expect(a_rep, b_rep, "__lesf2", le, le > 0, "> 0", line);
+    ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0", line);
+    ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1", line);
+    ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 1, "== 1", line);
     break;
   }
 
   return ret;
 }
 
-#define test__comparesf2(a, b, x) test__comparesf2(__LINE__, a, b, x)
+#define test__comparesf2(a, b, x) test__comparesf2(a, b, x, __LINE__)
 
 int main(void) {
   int status = 0;

>From 8bbb24e73bfd33c3e20ed5c98eefe2c4eb390ea2 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Wed, 25 Feb 2026 14:44:57 +0000
Subject: [PATCH 5/7] Rename Thumb1 SetResultRegister to ReturnResult

Also remove the return instructions following it in the main macro,
which aren't needed, since it does the returning itself.
---
 compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S |  2 +-
 compiler-rt/lib/builtins/arm/thumb1/fcmp.h   | 24 ++++++++------------
 compiler-rt/lib/builtins/arm/thumb1/gesf2.S  |  2 +-
 3 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
index c8611d1147366..e4a5e08c35181 100644
--- a/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
+++ b/compiler-rt/lib/builtins/arm/thumb1/cmpsf2.S
@@ -23,7 +23,7 @@
 
 op0 .req r0
 op1 .req r1
-.macro SetReturnRegister
+.macro ReturnResult
   bhi 0f
   blo 1f
   movs r0, #0
diff --git a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
index 0f0f46b158aad..7d85abae05129 100644
--- a/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
+++ b/compiler-rt/lib/builtins/arm/thumb1/fcmp.h
@@ -25,12 +25,13 @@
 //  - But a function with the reversed semantics of __aeabi_cfrcmple wil define
 //    them the other way round.
 //
-// SetReturnRegister: an assembly macro that looks at the PSR flags and sets up
-// an appropriate return value in r0, for the cases that do *not* involve NaN.
+// ReturnResult: an assembly macro that looks at the PSR flags, sets up an
+// appropriate return value in r0, and returns it, for the cases that do *not*
+// involve NaN.
 //  - On entry to this macro, the condition codes LO, EQ and HI indicate that
 //    op0 < op1, op0 == op1 or op0 > op1 respectively.
-//  - For functions that return a result in the flags, this macro can be empty,
-//    because those are the correct flags to return anyway.
+//  - For functions that return a result in the flags, this macro can just
+//    return immediately, because those are the correct flags to return anyway.
 //  - Functions that return a boolean in r0 should set it up by checking the
 //    flags.
 //
@@ -69,8 +70,7 @@
   // The fastest fast path: both inputs positive and we could easily tell there
   // were no NaNs. So we just compare op0 and op1 as unsigned integers.
   cmp     op0, op1
-  SetReturnRegister
-  bx      lr
+  ReturnResult
 
 LOCAL_LABEL(NaNInf_check_positive):
   // Second tier for positive numbers. We come here if both inputs are
@@ -95,8 +95,7 @@ LOCAL_LABEL(NaNInf_check_positive):
 
   // Second-tier return path, now we've ruled out anything difficult.
   cmp     op0, op1
-  SetReturnRegister
-  bx      lr
+  ReturnResult
 
 LOCAL_LABEL(NaN_check_positive):
   // Third tier for positive numbers. Here we know that at least one of the
@@ -122,8 +121,7 @@ LOCAL_LABEL(NaN_check_positive):
   // positive. So the third-tier return path can just compare the numbers
   // again.
   cmp     op0, op1
-  SetReturnRegister
-  bx      lr
+  ReturnResult
 
 LOCAL_LABEL(negative):
   // We come here if at least one operand is negative. We haven't checked for
@@ -163,8 +161,7 @@ LOCAL_LABEL(negative):
   beq     1f
   cmp     op1, op0                // otherwise, compare them backwards
 1:
-  SetReturnRegister
-  bx      lr
+  ReturnResult
 
 LOCAL_LABEL(NaNInf_check_negative):
   // Second tier for negative numbers: we know the OR of the exponents is 0xFF,
@@ -189,5 +186,4 @@ LOCAL_LABEL(NaNInf_check_negative):
   // exponent fields was 0xFF, which means the exponents can't both have been
   // zero! So we can _just_ do the reversed CMP and finish.
   cmp     op1, op0
-  SetReturnRegister
-  bx      lr
+  ReturnResult
diff --git a/compiler-rt/lib/builtins/arm/thumb1/gesf2.S b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
index aa75ec7b0a67b..3830b6cb21c29 100644
--- a/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
+++ b/compiler-rt/lib/builtins/arm/thumb1/gesf2.S
@@ -24,7 +24,7 @@
 
 op0 .req r0
 op1 .req r1
-.macro SetReturnRegister
+.macro ReturnResult
   bhi 0f
   blo 1f
   movs r0, #0

>From be05b10f52575736f3964b2b22c0a718fe8a9374 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Thu, 26 Feb 2026 13:07:59 +0000
Subject: [PATCH 6/7] Fix CI failure on Windows

The new test was failing on Windows, because it tries to call
`__cmpsf2`, which the generic builtins/comparesf2.c only defines
conditionally on `__ELF__`. Do the same in the test.
---
 compiler-rt/test/builtins/Unit/comparesf2new_test.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/compiler-rt/test/builtins/Unit/comparesf2new_test.c b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
index 5bed764b0957a..b5dfe2352958f 100644
--- a/compiler-rt/test/builtins/Unit/comparesf2new_test.c
+++ b/compiler-rt/test/builtins/Unit/comparesf2new_test.c
@@ -41,7 +41,11 @@ int test__comparesf2(uint32_t a_rep, uint32_t b_rep, enum Result result,
   int gt = __gtsf2(a, b);
   int le = __lesf2(a, b);
   int lt = __ltsf2(a, b);
+#ifdef __ELF__
+  // The generic builtins/comparedf2.c does not define this function
+  // for object formats other than ELF
   int cmp = __cmpsf2(a, b);
+#endif
   int unord = __unordsf2(a, b);
 
   int ret = 0;
@@ -54,7 +58,9 @@ int test__comparesf2(uint32_t a_rep, uint32_t b_rep, enum Result result,
     ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0", line);
     ret |= expect(a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0", line);
     ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt < 0, "< 0", line);
+#ifdef __ELF__
     ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == -1, "== -1", line);
+#endif
     ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0", line);
     break;
   case RESULT_GT:
@@ -64,7 +70,9 @@ int test__comparesf2(uint32_t a_rep, uint32_t b_rep, enum Result result,
     ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt > 0, "> 0", line);
     ret |= expect(a_rep, b_rep, "__lesf2", le, le > 0, "> 0", line);
     ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0", line);
+#ifdef __ELF__
     ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1", line);
+#endif
     ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0", line);
     break;
   case RESULT_EQ:
@@ -74,7 +82,9 @@ int test__comparesf2(uint32_t a_rep, uint32_t b_rep, enum Result result,
     ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0", line);
     ret |= expect(a_rep, b_rep, "__lesf2", le, le <= 0, "<= 0", line);
     ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0", line);
+#ifdef __ELF__
     ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == 0, "== 0", line);
+#endif
     ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 0, "== 0", line);
     break;
   case RESULT_UN:
@@ -84,7 +94,9 @@ int test__comparesf2(uint32_t a_rep, uint32_t b_rep, enum Result result,
     ret |= expect(a_rep, b_rep, "__gtsf2", gt, gt <= 0, "<= 0", line);
     ret |= expect(a_rep, b_rep, "__lesf2", le, le > 0, "> 0", line);
     ret |= expect(a_rep, b_rep, "__ltsf2", lt, lt >= 0, ">= 0", line);
+#ifdef __ELF__
     ret |= expect(a_rep, b_rep, "__cmpsf2", cmp, cmp == 1, "== 1", line);
+#endif
     ret |= expect(a_rep, b_rep, "__unordsf2", unord, unord == 1, "== 1", line);
     break;
   }

>From fe187f654bceb5dfe7a9ce55d336a101f8fcdd18 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham at arm.com>
Date: Tue, 17 Mar 2026 10:43:05 +0000
Subject: [PATCH 7/7] Stop trying to crt_supersede one Arm .S file with another

Turns out that doesn't work: both versions of the assembly language
comparison were included in the output library, and the linker would
make an arbitrary choice of which to pull in to the link. Instead,
just put the old files on to the SOURCES list in an else clause.
---
 compiler-rt/lib/builtins/CMakeLists.txt | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 0479120fef2cb..05474e4eb2dc7 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -520,7 +520,6 @@ set(arm_sync_SOURCES
 set(thumb1_base_SOURCES
   arm/divsi3.S
   arm/udivsi3.S
-  arm/comparesf2.S
   arm/addsf3.S
   ${GENERIC_SOURCES}
 )
@@ -543,9 +542,14 @@ if(COMPILER_RT_ARM_OPTIMIZED_FP)
   set_special_properties(arm/thumb1/cmpdf2.S
     SUPERSEDES comparedf2.c PROVIDES comparedf2)
   set_special_properties(arm/thumb1/cmpsf2.S
-    SUPERSEDES comparesf2.S)
-  # We don't need to set PROVIDES for cmpsf2.S, because the superseded
-  # comparesf2.S will already have enabled the comparesf2 tests.
+    SUPERSEDES comparesf2.c PROVIDES comparesf2)
+else()
+  # Other Thumb1 assembly implementations which do not fall under the
+  # COMPILER_RT_ARM_OPTIMIZED_FP umbrella
+  set(thumb1_base_SOURCES
+    arm/comparesf2.S
+    ${thumb1_base_SOURCES}
+  )
 endif()
 
 set(arm_EABI_RT_SOURCES



More information about the llvm-branch-commits mailing list