[compiler-rt] [compiler-rt][ARM] Optimized f32 add/subtract for Armv6-M. (PR #154093)

Simon Tatham via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 18 08:13:22 PDT 2025


================
@@ -0,0 +1,352 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// RUN: %clang_builtins %s %librt -o %t && %run %t
+// REQUIRES: librt_has_addsf3
+
+#include "int_lib.h"
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "fp_test.h"
+
+// Returns: a + b
+COMPILER_RT_ABI float __addsf3(float a, float b);
+
+int test__addsf3(uint32_t a_rep, uint32_t b_rep, uint32_t expected_rep) {
+  float a = fromRep32(a_rep), b = fromRep32(b_rep);
+  float x = __addsf3(a, b);
+  int ret = compareResultF(x, expected_rep);
+
+  if (ret) {
+    printf("error in test__addsf3(%08" PRIx32 ", %08" PRIx32 ") = %08" PRIx32
+           ", expected %08" PRIx32 "\n",
+           a_rep, b_rep, toRep32(x), expected_rep);
+  }
+  return ret;
+}
+
+int main() {
+  int status = 0;
+
+  status |= test__addsf3(0x00000000, 0x00000000, 0x00000000);
+  status |= test__addsf3(0x00000000, 0x007fffff, 0x007fffff);
+  status |= test__addsf3(0x00000000, 0x3f800000, 0x3f800000);
+  status |= test__addsf3(0x00000000, 0x7f000000, 0x7f000000);
+  status |= test__addsf3(0x00000000, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0x00000000, 0x80000000, 0x00000000);
+  status |= test__addsf3(0x00000000, 0x807fffff, 0x807fffff);
+  status |= test__addsf3(0x00000000, 0x80800000, 0x80800000);
+  status |= test__addsf3(0x00000000, 0xff800000, 0xff800000);
+  status |= test__addsf3(0x00000001, 0x00000001, 0x00000002);
+  status |= test__addsf3(0x00000001, 0x3f7fffff, 0x3f7fffff);
+  status |= test__addsf3(0x00000001, 0x3f800000, 0x3f800000);
+  status |= test__addsf3(0x00000001, 0x3ffffffe, 0x3ffffffe);
+  status |= test__addsf3(0x00000001, 0x3fffffff, 0x3fffffff);
+  status |= test__addsf3(0x00000001, 0x7effffff, 0x7effffff);
+  status |= test__addsf3(0x00000001, 0x7f000000, 0x7f000000);
+  status |= test__addsf3(0x00000001, 0x7f7ffffe, 0x7f7ffffe);
+  status |= test__addsf3(0x00000001, 0x7f7fffff, 0x7f7fffff);
+  status |= test__addsf3(0x00000001, 0x80000001, 0x00000000);
+  status |= test__addsf3(0x00000002, 0x80000001, 0x00000001);
+  status |= test__addsf3(0x00000003, 0x00000000, 0x00000003);
+  status |= test__addsf3(0x00000003, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0x00000003, 0x80000000, 0x00000003);
+  status |= test__addsf3(0x00000003, 0x80000002, 0x00000001);
+  status |= test__addsf3(0x00000003, 0xc0a00000, 0xc0a00000);
+  status |= test__addsf3(0x00000003, 0xff000000, 0xff000000);
+  status |= test__addsf3(0x00000003, 0xff800000, 0xff800000);
+  status |= test__addsf3(0x00000004, 0x00000004, 0x00000008);
+  status |= test__addsf3(0x007ffffc, 0x807ffffc, 0x00000000);
+  status |= test__addsf3(0x007ffffd, 0x807ffffe, 0x80000001);
+  status |= test__addsf3(0x007fffff, 0x007fffff, 0x00fffffe);
+  status |= test__addsf3(0x007fffff, 0x807ffffe, 0x00000001);
+  status |= test__addsf3(0x007fffff, 0x80800000, 0x80000001);
+  status |= test__addsf3(0x00800000, 0x00000000, 0x00800000);
+  status |= test__addsf3(0x00800000, 0x00800000, 0x01000000);
+  status |= test__addsf3(0x00800000, 0x80800000, 0x00000000);
+  status |= test__addsf3(0x00800001, 0x80800000, 0x00000001);
+  status |= test__addsf3(0x00800001, 0x80800002, 0x80000001);
+  status |= test__addsf3(0x00ffffff, 0x81000000, 0x80000001);
+  status |= test__addsf3(0x00ffffff, 0x81000002, 0x80000005);
+  status |= test__addsf3(0x00ffffff, 0x81000004, 0x80000009);
+  status |= test__addsf3(0x01000000, 0x80ffffff, 0x00000001);
+  status |= test__addsf3(0x01000001, 0x80800001, 0x00800001);
+  status |= test__addsf3(0x01000001, 0x80ffffff, 0x00000003);
+  status |= test__addsf3(0x01000002, 0x80800001, 0x00800003);
+  status |= test__addsf3(0x017fffff, 0x81800000, 0x80000002);
+  status |= test__addsf3(0x01800000, 0x817fffff, 0x00000002);
+  status |= test__addsf3(0x01800001, 0x817fffff, 0x00000006);
+  status |= test__addsf3(0x01800002, 0x81000003, 0x01000001);
+  status |= test__addsf3(0x3f7fffff, 0x80000001, 0x3f7fffff);
+  status |= test__addsf3(0x3f800000, 0x3f800000, 0x40000000);
+  status |= test__addsf3(0x3f800000, 0x3f800003, 0x40000002);
+  status |= test__addsf3(0x3f800000, 0x40000000, 0x40400000);
+  status |= test__addsf3(0x3f800000, 0x40e00000, 0x41000000);
+  status |= test__addsf3(0x3f800000, 0x80000000, 0x3f800000);
+  status |= test__addsf3(0x3f800000, 0xbf800000, 0x00000000);
+  status |= test__addsf3(0x3f800001, 0x3f800000, 0x40000000);
+  status |= test__addsf3(0x3f800001, 0xbf800000, 0x34000000);
+  status |= test__addsf3(0x3f800001, 0xbf800002, 0xb4000000);
+  status |= test__addsf3(0x3ffffffc, 0xbffffffd, 0xb4000000);
+  status |= test__addsf3(0x3fffffff, 0xc0000000, 0xb4000000);
+  status |= test__addsf3(0x40000000, 0x34000000, 0x40000000);
+  status |= test__addsf3(0x40000000, 0x3f800000, 0x40400000);
+  status |= test__addsf3(0x40000000, 0x40000000, 0x40800000);
+  status |= test__addsf3(0x40000000, 0x40000001, 0x40800000);
+  status |= test__addsf3(0x40000000, 0xbfffffff, 0x34000000);
+  status |= test__addsf3(0x40000000, 0xc0000000, 0x00000000);
+  status |= test__addsf3(0x40000000, 0xc0000001, 0xb4800000);
+  status |= test__addsf3(0x40000000, 0xc0a00000, 0xc0400000);
+  status |= test__addsf3(0x40000001, 0x34000000, 0x40000002);
+  status |= test__addsf3(0x40000001, 0x40000002, 0x40800002);
+  status |= test__addsf3(0x40000001, 0xbf800001, 0x3f800001);
+  status |= test__addsf3(0x40000002, 0xbf800001, 0x3f800003);
+  status |= test__addsf3(0x40000002, 0xbf800003, 0x3f800001);
+  status |= test__addsf3(0x40000004, 0xc0000003, 0x34800000);
+  status |= test__addsf3(0x40400000, 0x40400000, 0x40c00000);
+  status |= test__addsf3(0x407fffff, 0x33ffffff, 0x407fffff);
+  status |= test__addsf3(0x407fffff, 0x34000000, 0x40800000);
+  status |= test__addsf3(0x407fffff, 0xc07ffffe, 0x34800000);
+  status |= test__addsf3(0x407fffff, 0xc0800002, 0xb5a00000);
+  status |= test__addsf3(0x40800001, 0xc07fffff, 0x35400000);
+  status |= test__addsf3(0x40a00000, 0x00000000, 0x40a00000);
+  status |= test__addsf3(0x40a00000, 0x80000000, 0x40a00000);
+  status |= test__addsf3(0x40a00000, 0xbf800000, 0x40800000);
+  status |= test__addsf3(0x40a00000, 0xc0a00000, 0x00000000);
+  status |= test__addsf3(0x7d800001, 0xfd7fffff, 0x72400000);
+  status |= test__addsf3(0x7e7fffff, 0xfe7ffffe, 0x72800000);
+  status |= test__addsf3(0x7e7fffff, 0xfe800002, 0xf3a00000);
+  status |= test__addsf3(0x7e800000, 0x7e800000, 0x7f000000);
+  status |= test__addsf3(0x7e800000, 0xfe7fffff, 0x72800000);
+  status |= test__addsf3(0x7e800000, 0xfe800001, 0xf3000000);
+  status |= test__addsf3(0x7e800001, 0x7e800000, 0x7f000000);
+  status |= test__addsf3(0x7e800001, 0xff000001, 0xfe800001);
+  status |= test__addsf3(0x7e800002, 0xfe000003, 0x7e000001);
+  status |= test__addsf3(0x7e800004, 0xfe800003, 0x73000000);
+  status |= test__addsf3(0x7efffffe, 0x7efffffe, 0x7f7ffffe);
+  status |= test__addsf3(0x7efffffe, 0x7effffff, 0x7f7ffffe);
+  status |= test__addsf3(0x7effffff, 0x3f800000, 0x7effffff);
+  status |= test__addsf3(0x7effffff, 0x7f000000, 0x7f800000);
+  status |= test__addsf3(0x7effffff, 0xbf800000, 0x7effffff);
+  status |= test__addsf3(0x7effffff, 0xff000000, 0xf3000000);
+  status |= test__addsf3(0x7f000000, 0x3f800000, 0x7f000000);
+  status |= test__addsf3(0x7f000000, 0x7f000000, 0x7f800000);
+  status |= test__addsf3(0x7f000000, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0x7f000000, 0xbf800000, 0x7f000000);
+  status |= test__addsf3(0x7f000000, 0xff000000, 0x00000000);
+  status |= test__addsf3(0x7f000000, 0xff800000, 0xff800000);
+  status |= test__addsf3(0x7f000001, 0x7f000000, 0x7f800000);
+  status |= test__addsf3(0x7f000001, 0xff000000, 0x73800000);
+  status |= test__addsf3(0x7f000001, 0xff000002, 0xf3800000);
+  status |= test__addsf3(0x7f000002, 0xfe800001, 0x7e800003);
+  status |= test__addsf3(0x7f7ffffe, 0x3f800000, 0x7f7ffffe);
+  status |= test__addsf3(0x7f7ffffe, 0x7f7ffffe, 0x7f800000);
+  status |= test__addsf3(0x7f7ffffe, 0x7f7fffff, 0x7f800000);
+  status |= test__addsf3(0x7f7ffffe, 0xbf800000, 0x7f7ffffe);
+  status |= test__addsf3(0x7f7ffffe, 0xff7fffff, 0xf3800000);
+  status |= test__addsf3(0x7f7fffff, 0x3f800000, 0x7f7fffff);
+  status |= test__addsf3(0x7f7fffff, 0x80000001, 0x7f7fffff);
+  status |= test__addsf3(0x7f7fffff, 0xbf800000, 0x7f7fffff);
+  status |= test__addsf3(0x7f7fffff, 0xff7fffff, 0x00000000);
+  status |= test__addsf3(0x7f800000, 0x00000000, 0x7f800000);
+  status |= test__addsf3(0x7f800000, 0x007fffff, 0x7f800000);
+  status |= test__addsf3(0x7f800000, 0x7f000000, 0x7f800000);
+  status |= test__addsf3(0x7f800000, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0x7f800000, 0x80000000, 0x7f800000);
+  status |= test__addsf3(0x7f800000, 0x807fffff, 0x7f800000);
+  status |= test__addsf3(0x7f800000, 0xff000000, 0x7f800000);
+  status |= test__addsf3(0x80000000, 0x00000000, 0x00000000);
+  status |= test__addsf3(0x80000000, 0x007fffff, 0x007fffff);
+  status |= test__addsf3(0x80000000, 0x7f000000, 0x7f000000);
+  status |= test__addsf3(0x80000000, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0x80000000, 0x80000000, 0x80000000);
+  status |= test__addsf3(0x80000000, 0x807fffff, 0x807fffff);
+  status |= test__addsf3(0x80000000, 0x80800000, 0x80800000);
+  status |= test__addsf3(0x80000000, 0xbf800000, 0xbf800000);
+  status |= test__addsf3(0x80000000, 0xff800000, 0xff800000);
+  status |= test__addsf3(0x80000001, 0x00000001, 0x00000000);
+  status |= test__addsf3(0x80000001, 0x80000001, 0x80000002);
+  status |= test__addsf3(0x80000001, 0xbf7fffff, 0xbf7fffff);
+  status |= test__addsf3(0x80000001, 0xbf800000, 0xbf800000);
+  status |= test__addsf3(0x80000001, 0xbffffffe, 0xbffffffe);
+  status |= test__addsf3(0x80000001, 0xbfffffff, 0xbfffffff);
+  status |= test__addsf3(0x80000001, 0xfeffffff, 0xfeffffff);
+  status |= test__addsf3(0x80000001, 0xff000000, 0xff000000);
+  status |= test__addsf3(0x80000001, 0xff7ffffe, 0xff7ffffe);
+  status |= test__addsf3(0x80000001, 0xff7fffff, 0xff7fffff);
+  status |= test__addsf3(0x80000002, 0x00000001, 0x80000001);
+  status |= test__addsf3(0x80000003, 0x00000000, 0x80000003);
+  status |= test__addsf3(0x80000003, 0x00000002, 0x80000001);
+  status |= test__addsf3(0x80000003, 0x40400000, 0x40400000);
+  status |= test__addsf3(0x80000003, 0x7f000000, 0x7f000000);
+  status |= test__addsf3(0x80000003, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0x80000003, 0x80000000, 0x80000003);
+  status |= test__addsf3(0x80000003, 0xff800000, 0xff800000);
+  status |= test__addsf3(0x80000004, 0x80000004, 0x80000008);
+  status |= test__addsf3(0x807ffffd, 0x007ffffe, 0x00000001);
+  status |= test__addsf3(0x807fffff, 0x007ffffe, 0x80000001);
+  status |= test__addsf3(0x807fffff, 0x007fffff, 0x00000000);
+  status |= test__addsf3(0x807fffff, 0x00800000, 0x00000001);
+  status |= test__addsf3(0x807fffff, 0x807fffff, 0x80fffffe);
+  status |= test__addsf3(0x80800000, 0x00000000, 0x80800000);
+  status |= test__addsf3(0x80800000, 0x00800000, 0x00000000);
+  status |= test__addsf3(0x80800001, 0x00800000, 0x80000001);
+  status |= test__addsf3(0x80800001, 0x00800002, 0x00000001);
+  status |= test__addsf3(0x80ffffff, 0x01000000, 0x00000001);
+  status |= test__addsf3(0x80ffffff, 0x01000002, 0x00000005);
+  status |= test__addsf3(0x80ffffff, 0x01000004, 0x00000009);
+  status |= test__addsf3(0x81000000, 0x00ffffff, 0x80000001);
+  status |= test__addsf3(0x81000001, 0x00800001, 0x80800001);
+  status |= test__addsf3(0x81000001, 0x00ffffff, 0x80000003);
+  status |= test__addsf3(0x81000002, 0x00800001, 0x80800003);
+  status |= test__addsf3(0x817fffff, 0x01800000, 0x00000002);
+  status |= test__addsf3(0x81800000, 0x017fffff, 0x80000002);
+  status |= test__addsf3(0x81800001, 0x017fffff, 0x80000006);
+  status |= test__addsf3(0x81800002, 0x01000003, 0x81000001);
+  status |= test__addsf3(0xbf800000, 0x80000000, 0xbf800000);
+  status |= test__addsf3(0xbf800000, 0xbf800003, 0xc0000002);
+  status |= test__addsf3(0xbf800001, 0x3f800000, 0xb4000000);
+  status |= test__addsf3(0xbf800001, 0x3f800002, 0x34000000);
+  status |= test__addsf3(0xbf800001, 0xbf800000, 0xc0000000);
+  status |= test__addsf3(0xbffffffc, 0x3ffffffd, 0x34000000);
+  status |= test__addsf3(0xbfffffff, 0x00000001, 0xbfffffff);
+  status |= test__addsf3(0xbfffffff, 0x40000000, 0x34000000);
+  status |= test__addsf3(0xc0000000, 0x3fffffff, 0xb4000000);
+  status |= test__addsf3(0xc0000000, 0x40000001, 0x34800000);
+  status |= test__addsf3(0xc0000000, 0xc0000001, 0xc0800000);
+  status |= test__addsf3(0xc0000001, 0x3f800001, 0xbf800001);
+  status |= test__addsf3(0xc0000001, 0xc0000002, 0xc0800002);
+  status |= test__addsf3(0xc0000002, 0x3f800001, 0xbf800003);
+  status |= test__addsf3(0xc0000002, 0x3f800003, 0xbf800001);
+  status |= test__addsf3(0xc0000004, 0x40000003, 0xb4800000);
+  status |= test__addsf3(0xc0400000, 0x40400000, 0x00000000);
+  status |= test__addsf3(0xc07fffff, 0x407ffffe, 0xb4800000);
+  status |= test__addsf3(0xc07fffff, 0x40800002, 0x35a00000);
+  status |= test__addsf3(0xc07fffff, 0xb3ffffff, 0xc07fffff);
+  status |= test__addsf3(0xc07fffff, 0xb4000000, 0xc0800000);
+  status |= test__addsf3(0xc0800001, 0x407fffff, 0xb5400000);
+  status |= test__addsf3(0xfd800001, 0x7d7fffff, 0xf2400000);
+  status |= test__addsf3(0xfe7fffff, 0x7e7ffffe, 0xf2800000);
+  status |= test__addsf3(0xfe7fffff, 0x7e800002, 0x73a00000);
+  status |= test__addsf3(0xfe800000, 0x7e7fffff, 0xf2800000);
+  status |= test__addsf3(0xfe800000, 0x7e800001, 0x73000000);
+  status |= test__addsf3(0xfe800001, 0x7f000001, 0x7e800001);
+  status |= test__addsf3(0xfe800001, 0xfe800000, 0xff000000);
+  status |= test__addsf3(0xfe800002, 0x7e000003, 0xfe000001);
+  status |= test__addsf3(0xfe800004, 0x7e800003, 0xf3000000);
+  status |= test__addsf3(0xfefffffe, 0x7efffffe, 0x00000000);
+  status |= test__addsf3(0xfefffffe, 0xfefffffe, 0xff7ffffe);
+  status |= test__addsf3(0xfefffffe, 0xfeffffff, 0xff7ffffe);
+  status |= test__addsf3(0xfeffffff, 0x3f800000, 0xfeffffff);
+  status |= test__addsf3(0xfeffffff, 0x7f000000, 0x73000000);
+  status |= test__addsf3(0xfeffffff, 0xbf800000, 0xfeffffff);
+  status |= test__addsf3(0xfeffffff, 0xff000000, 0xff800000);
+  status |= test__addsf3(0xff000000, 0x00000000, 0xff000000);
+  status |= test__addsf3(0xff000000, 0x3f800000, 0xff000000);
+  status |= test__addsf3(0xff000000, 0x7f800000, 0x7f800000);
+  status |= test__addsf3(0xff000000, 0x80000000, 0xff000000);
+  status |= test__addsf3(0xff000000, 0xbf800000, 0xff000000);
+  status |= test__addsf3(0xff000000, 0xff000000, 0xff800000);
+  status |= test__addsf3(0xff000000, 0xff800000, 0xff800000);
+  status |= test__addsf3(0xff000001, 0x7f000000, 0xf3800000);
+  status |= test__addsf3(0xff000001, 0x7f000002, 0x73800000);
+  status |= test__addsf3(0xff000001, 0xff000000, 0xff800000);
+  status |= test__addsf3(0xff000002, 0x7e800001, 0xfe800003);
+  status |= test__addsf3(0xff7ffffe, 0x3f800000, 0xff7ffffe);
+  status |= test__addsf3(0xff7ffffe, 0x7f7fffff, 0x73800000);
+  status |= test__addsf3(0xff7ffffe, 0xbf800000, 0xff7ffffe);
+  status |= test__addsf3(0xff7ffffe, 0xff7ffffe, 0xff800000);
+  status |= test__addsf3(0xff7ffffe, 0xff7fffff, 0xff800000);
+  status |= test__addsf3(0xff7fffff, 0x00000001, 0xff7fffff);
+  status |= test__addsf3(0xff7fffff, 0x3f800000, 0xff7fffff);
+  status |= test__addsf3(0xff7fffff, 0xbf800000, 0xff7fffff);
+  status |= test__addsf3(0xff800000, 0x00000000, 0xff800000);
+  status |= test__addsf3(0xff800000, 0x007fffff, 0xff800000);
+  status |= test__addsf3(0xff800000, 0x7f000000, 0xff800000);
+  status |= test__addsf3(0xff800000, 0x80000000, 0xff800000);
+  status |= test__addsf3(0xff800000, 0x807fffff, 0xff800000);
+  status |= test__addsf3(0xff800000, 0xff000000, 0xff800000);
+  status |= test__addsf3(0xff800000, 0xff800000, 0xff800000);
+  status |= test__addsf3(0x7f7fffff, 0x74ffffff, 0x7f800000);
+  status |= test__addsf3(0x3f7fffff, 0x34004000, 0x3f800001);
+  status |= test__addsf3(0x3f800001, 0x23800000, 0x3f800001);
+  status |= test__addsf3(0xbbebe66d, 0x3b267c1f, 0xbb98a85e);
+  status |= test__addsf3(0x01f5b166, 0x81339a37, 0x019be44a);
+
+#if __thumb__ && !__thumb2__
----------------
statham-arm wrote:

I'm not sure it could – these test sources are compiled via lit, not via cmake-generated compile commands, so cmake `add_definitions` or similar wouldn't affect them.

It would probably be easier to set a lit feature name, such as you can query in `REQUIRES:` lines. Then I could move the NaN-policy-specific tests out into separate files conditioned on `librt_has_addsf3_arm_nan` or some such.

https://github.com/llvm/llvm-project/pull/154093


More information about the llvm-commits mailing list