[clang] [llvm] [AArch64] Add soft-float ABI (PR #84146)

Wed Mar 6 02:04:47 PST 2024

https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/84146

>From 96416238cb4d5c197aab4fed96af5161ebff7198 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 1 Dec 2023 10:06:57 +0000
Subject: [PATCH 1/7] [AArch64] Split feature tests for FP and SIMD

AArch64TargetInfo defaults to having the FP feature enabled, but this
function was ignoring that and checking for SIMD instructions instead.

This won't affect most users, because the driver explicitly enables or
disables fp-armv8, which gets handled by
AArch64TargetInfo::handleTargetFeatures to turn FP and SIMD on or off.
However, it will make testing future patches easier, and allow testing
for the presense of FP registers/instructions in CC1 tests.

Change-Id: I2d2b3569dca5fa1dc40c5c6d1dabf7741b8c480e
---
 clang/lib/Basic/Targets/AArch64.cpp             |  3 ++-
 clang/test/CodeGen/attr-target-clones-aarch64.c | 10 ----------
 2 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 5abb060073c517..a6400fbb8cc497 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -680,7 +680,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Cases("aarch64", "arm64", "arm", true)
       .Case("fmv", HasFMV)
-      .Cases("neon", "fp", "simd", FPU & NeonMode)
+      .Case("fp", FPU & FPUMode)
+      .Cases("neon", "simd", FPU & NeonMode)
       .Case("jscvt", HasJSCVT)
       .Case("fcma", HasFCMA)
       .Case("rng", HasRandGen)
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 276a7b87b7a1b4..94095f9aa3e1f4 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -23,8 +23,6 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 
 
 
-
-
 //.
 // CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
 // CHECK: @ftc.ifunc = weak_odr alias i32 (), ptr @ftc
@@ -177,15 +175,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @ftc_dup2._McrcMdotprod
 // CHECK:       resolver_else:
-// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 256
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 256
-// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
-// CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
-// CHECK:       resolver_return1:
 // CHECK-NEXT:    ret ptr @ftc_dup2._Mfp
-// CHECK:       resolver_else2:
-// CHECK-NEXT:    ret ptr @ftc_dup2.default
 //
 //
 // CHECK: Function Attrs: noinline nounwind optnone

>From 74b560acfd5f2c79e7bc9bc852ada2a988928a7c Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 23 Nov 2023 11:29:28 +0000
Subject: [PATCH 2/7] [AArch64] Add the -mabi=aapcs-soft option for AArch64

This patch just adds the ABI enum, command-line options and some error
checking, future patches will emit ABI-compliant code, and add more
error checkng for individual function ABIs.

Command-line option behaviour:
* The default is unchanged ("aapcs", except for targets where it already
  defaults to "darwinpcs"), independent of whether the target has an FPU
* If -mabi=aapcs-soft is used and the target has an FPU, the driver
  reports an error, to prevent having two incompatible ABIs for one
  target.
* If a hard-float ABI is requested for a target without an FPU, this is
  accepted by the driver. Later patches will make functions with
  floating-point arguments or return types invalid for this combination,
  matching GCC's behaviour.
---
 .../clang/Basic/DiagnosticCommonKinds.td      |   2 +
 clang/lib/Basic/Targets/AArch64.cpp           |  13 +-
 clang/lib/Basic/Targets/AArch64.h             |   2 +
 clang/lib/CodeGen/CodeGenModule.cpp           |   2 +
 clang/lib/CodeGen/TargetInfo.h                |   1 +
 clang/test/CodeGen/aarch64-soft-float-abi.c   |  18 ++
 clang/test/Driver/aarch64-soft-float-abi.c    |  26 +++
 .../Preprocessor/aarch64-target-features.c    |   6 +-
 llvm/test/CodeGen/AArch64/soft-float-abi.ll   | 161 ++++++++++++++++++
 9 files changed, 227 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-soft-float-abi.c
 create mode 100644 clang/test/Driver/aarch64-soft-float-abi.c
 create mode 100644 llvm/test/CodeGen/AArch64/soft-float-abi.ll

diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 08bb1d81ba29f1..43e132e5665850 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -356,6 +356,8 @@ def warn_target_unrecognized_env : Warning<
 def warn_knl_knm_isa_support_removed : Warning<
   "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.">,
   InGroup<DiagGroup<"knl-knm-isa-support-removed">>;
+def err_target_unsupported_abi_with_fpu : Error<
+  "'%0' ABI is not supported with FPU">;
 
 // Source manager
 def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index a6400fbb8cc497..4f337b4e366f88 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
@@ -199,13 +200,23 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
 StringRef AArch64TargetInfo::getABI() const { return ABI; }
 
 bool AArch64TargetInfo::setABI(const std::string &Name) {
-  if (Name != "aapcs" && Name != "darwinpcs")
+  if (Name != "aapcs" && Name != "aapcs-soft" && Name != "darwinpcs")
     return false;
 
   ABI = Name;
   return true;
 }
 
+bool AArch64TargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
+  if (hasFeature("fp") && ABI == "aapcs-soft") {
+    // aapcs-soft is not allowed for targets with an FPU, to avoid there being
+    // two incomatible ABIs.
+    Diags.Report(diag::err_target_unsupported_abi_with_fpu) << ABI;
+    return false;
+  }
+  return true;
+}
+
 bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef,
                                                  BranchProtectionInfo &BPI,
                                                  StringRef &Err) const {
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index c1ba156860a122..031b62bb6373f3 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -199,6 +199,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool hasInt128Type() const override;
 
   bool hasBitIntType() const override { return true; }
+
+  bool validateTarget(DiagnosticsEngine &Diags) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 82a97ecfaa0078..99a07878a46ed9 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -145,6 +145,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
       Kind = AArch64ABIKind::DarwinPCS;
     else if (Triple.isOSWindows())
       return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
+    else if (Target.getABI() == "aapcs-soft")
+      Kind = AArch64ABIKind::AAPCSSoft;
 
     return createAArch64TargetCodeGenInfo(CGM, Kind);
   }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 7682f197041c74..0571e828bb1d4a 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -416,6 +416,7 @@ enum class AArch64ABIKind {
   AAPCS = 0,
   DarwinPCS,
   Win64,
+  AAPCSSoft,
 };
 
 std::unique_ptr<TargetCodeGenInfo>
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..4b8de73daaa923
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -target-abi aapcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+
+// See also llvm/test/CodeGen/AArch64/soft-float-abi.ll, which checks the LLVM
+// backend parts of the soft-float ABI.
+
+// Floats are passed in integer registers, this will be handled by the backend.
+// CHECK: define dso_local half @test0(half noundef %a)
+// CHECK: define dso_local bfloat @test1(bfloat noundef %a)
+// CHECK: define dso_local float @test2(float noundef %a)
+// CHECK: define dso_local double @test3(double noundef %a)
+// CHECK: define dso_local fp128 @test4(fp128 noundef %a)
+__fp16 test0(__fp16 a) { return a; }
+__bf16 test1(__bf16 a) { return a; }
+float test2(float a) { return a; }
+double test3(double a) { return a; }
+long double test4(long double a) { return a; }
+
diff --git a/clang/test/Driver/aarch64-soft-float-abi.c b/clang/test/Driver/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..0486d94e66072e
--- /dev/null
+++ b/clang/test/Driver/aarch64-soft-float-abi.c
@@ -0,0 +1,26 @@
+// REQUIRES: aarch64-registered-target
+
+// Hard-float, valid
+// RUN: %clang --target=aarch64-none-elf                               -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf                   -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r                -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r    -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+fp -mabi=aapcs -c %s -o /dev/null
+
+// Soft-float, no FP
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp -mabi=aapcs-soft -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -mgeneral-regs-only -mabi=aapcs-soft -c %s -o /dev/null
+
+// Soft-float, FP hardware: Rejected, to avoid having two incompatible ABIs for common targets.
+// RUN: not %clang --target=aarch64-none-elf                        -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+// RUN: not %clang --target=aarch64-none-elf -march=armv8-r+fp      -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+// RUN: not %clang --target=aarch64-none-elf -march=armv8-r+nofp+fp -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+
+// No FP, hard-float. This is accepted by the driver, but functions which
+// require arguments or returns to be passed in FP registers will be rejected
+// (tested elsewhere).
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp             -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -mgeneral-regs-only -mabi=aapcs -c %s -o /dev/null
+
+// INVALID-SOFT: error: 'aapcs-soft' ABI is not supported with FPU
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 6ec4dcd60cf601..9f8a8bdeeb9cb0 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -342,15 +342,15 @@
 
 // RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-1 %s
 // RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto+fp+simd+crc+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-1 %s
-// RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
-// RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto+nofp+nosimd+nocrc+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
+// RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto -mabi=aapcs-soft -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
+// RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto+nofp+nosimd+nocrc+nocrypto -mabi=aapcs-soft -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
 // RUN: %clang -target aarch64 -march=armv8-a+nosimd -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-3 %s
 // CHECK-MARCH-1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+fp-armv8" "-target-feature" "+sha2" "-target-feature" "+neon"
 // CHECK-MARCH-2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-fp-armv8"{{.*}} "-target-feature" "-neon"
 // CHECK-MARCH-3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-neon"
 
 // While we're checking +nofp, also make sure it stops defining __ARM_FP
-// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -mabi=aapcs-soft -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s
 // CHECK-NOFP-NOT: #define __ARM_FP{{ }}
 
 // Check +sm4:
diff --git a/llvm/test/CodeGen/AArch64/soft-float-abi.ll b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
new file mode 100644
index 00000000000000..291c3875c2488d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc --mtriple aarch64-none-eabi < %s -mattr=-fp-armv8 | FileCheck %s
+
+; See also clang/test/CodeGen/aarch64-soft-float-abi.c, which tests the clang
+; parts of the soft-float ABI.
+
+; FP types up to 64-bit are passed in a general purpose register.
+define half @test0(half %a, half %b)  {
+; CHECK-LABEL: test0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret half %b
+}
+
+define bfloat @test1(i32 %a, bfloat %b) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret bfloat %b
+}
+
+define float @test2(i64 %a, float %b) {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret float %b
+}
+
+define double @test3(half %a, double %b) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+entry:
+  ret double %b
+}
+
+; fp128 is passed in a pair of GPRs.
+define fp128 @test4(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; fp128 is passed in an aligned pair of GPRs, leaving one register unused is
+; necessary.
+define fp128 @test5(float %a, fp128 %b) {
+; CHECK-LABEL: test5:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; If the alignment of an fp128 leaves a register unused, it remains unused even
+; if a later argument could fit in it.
+define i64 @test6(i64 %a, fp128 %b, i64 %c) {
+; CHECK-LABEL: test6:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x4
+; CHECK-NEXT:    ret
+entry:
+  ret i64 %c
+}
+
+; HFAs are all bit-casted to integer types in the frontend when using the
+; soft-float ABI, so they get passed in the same way as non-homeogeneous
+; aggregates. The IR is identical to the equivalent integer types, so nothing
+; to test here.
+
+; The PCS for vector and HVA types is not defined by the soft-float ABI because
+; these types are only defined by the ACLE when vector hardware is available,
+; so nothing to test here.
+
+; The front-end generates IR for va_arg which always reads from the integer
+; register save area, and never the floating-point register save area. The
+; layout of the va_list type remains the same, the floating-point related
+; fields are unused. The only change needed in the backend is  in va_start, to
+; not attempt to save the floating-point registers or set the FP fields in the
+; va_list.
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+declare void @llvm.va_start(ptr)
+define double @test20(i32 %a, ...) {
+; CHECK-LABEL: test20:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    mov w8, #-56 // =0xffffffc8
+; CHECK-NEXT:    add x10, sp, #8
+; CHECK-NEXT:    add x9, sp, #96
+; CHECK-NEXT:    str x8, [sp, #88]
+; CHECK-NEXT:    add x10, x10, #56
+; CHECK-NEXT:    ldrsw x8, [sp, #88]
+; CHECK-NEXT:    stp x1, x2, [sp, #8]
+; CHECK-NEXT:    stp x3, x4, [sp, #24]
+; CHECK-NEXT:    stp x5, x6, [sp, #40]
+; CHECK-NEXT:    stp x7, x9, [sp, #56]
+; CHECK-NEXT:    str x10, [sp, #72]
+; CHECK-NEXT:    tbz w8, #31, .LBB7_3
+; CHECK-NEXT:  // %bb.1: // %vaarg.maybe_reg
+; CHECK-NEXT:    add w9, w8, #8
+; CHECK-NEXT:    cmn w8, #8
+; CHECK-NEXT:    str w9, [sp, #88]
+; CHECK-NEXT:    b.gt .LBB7_3
+; CHECK-NEXT:  // %bb.2: // %vaarg.in_reg
+; CHECK-NEXT:    ldr x9, [sp, #72]
+; CHECK-NEXT:    add x8, x9, x8
+; CHECK-NEXT:    b .LBB7_4
+; CHECK-NEXT:  .LBB7_3: // %vaarg.on_stack
+; CHECK-NEXT:    ldr x8, [sp, #64]
+; CHECK-NEXT:    add x9, x8, #8
+; CHECK-NEXT:    str x9, [sp, #64]
+; CHECK-NEXT:  .LBB7_4: // %vaarg.end
+; CHECK-NEXT:    ldr x0, [x8]
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
+entry:
+  %vl = alloca %struct.__va_list, align 8
+  call void @llvm.va_start(ptr nonnull %vl)
+  %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 3
+  %gr_offs = load i32, ptr %gr_offs_p, align 8
+  %0 = icmp sgt i32 %gr_offs, -1
+  br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %new_reg_offs = add nsw i32 %gr_offs, 8
+  store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+  %inreg = icmp slt i32 %gr_offs, -7
+  br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 1
+  %reg_top = load ptr, ptr %reg_top_p, align 8
+  %1 = sext i32 %gr_offs to i64
+  %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+  %stack = load ptr, ptr %vl, align 8
+  %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+  store ptr %new_stack, ptr %vl, align 8
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+  %3 = load double, ptr %vaargs.addr, align 8
+  ret double %3
+}
+

>From 5fd2409ced0ff3e3376398ffadf316c9b50610a2 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 23 Nov 2023 11:31:52 +0000
Subject: [PATCH 3/7] [AArch64] Disable HFAs/HVAs for soft-float

If we can't pass them in FP registers, then homogeneous floating-point
and vector aggregates should be treated like any other composite type,
and passed either in registers or on the stack.

Change-Id: Icd56e122ad586462d6059069f923ffca4b32a8d2
---
 clang/lib/CodeGen/Targets/AArch64.cpp       |  5 +++++
 clang/test/CodeGen/aarch64-soft-float-abi.c | 24 +++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 725e8a70fddfe6..93424ab9f00704 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -494,6 +494,11 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // For the soft-float ABI variant, no types are considered to be homogeneous
+  // aggregates.
+  if (Kind == AArch64ABIKind::AAPCSSoft)
+    return false;
+
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
   // but with the difference that any floating-point type is allowed,
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
index 4b8de73daaa923..cbfcccb7c593c1 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -target-abi aapcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -target-abi aapcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
 
 // See also llvm/test/CodeGen/AArch64/soft-float-abi.ll, which checks the LLVM
 // backend parts of the soft-float ABI.
@@ -16,3 +16,23 @@ float test2(float a) { return a; }
 double test3(double a) { return a; }
 long double test4(long double a) { return a; }
 
+// No types are considered to be HFAs or HVAs by the soft-float PCS, so these
+// are converted to integer types.
+struct A {
+  float x;
+};
+// SOFT: define dso_local i32 @test10(i64 %a.coerce)
+// HARD: define dso_local %struct.A @test10([1 x float] alignstack(8) %a.coerce)
+struct A test10(struct A a) { return a; }
+
+struct B {
+  double x;
+  double y;
+};
+// SOFT: define dso_local [2 x i64] @test11([2 x i64] %a.coerce)
+// HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
+struct B test11(struct B a) { return a; }
+
+// Vector types are only available for targets with the correct hardware, and
+// their calling-convention is left undefined by the soft-float ABI, so they
+// aren't tested here.

>From d842ccc106a93c8edb96a5411ba228f7bbab85eb Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 23 Nov 2023 11:31:28 +0000
Subject: [PATCH 4/7] [AArch64] Fix va_arg for soft-float

The AArch64 back-end already avoids saving the FP registers to the
va_list when the FP registers aren't present, but clang also needs to
know not to load them from the FP register save area when generating
code for va_arg.

The layout of va_list remains the same, but the vr_top and vr_offs
fields are unused.

Change-Id: I5d3dee1ac4a29f189432957910662939b79d9329
---
 clang/lib/CodeGen/Targets/AArch64.cpp       | 12 +++++++-----
 clang/test/CodeGen/aarch64-soft-float-abi.c | 20 ++++++++++++++++++++
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 93424ab9f00704..4164ab0c4fe235 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -55,8 +55,8 @@ class AArch64ABIInfo : public ABIInfo {
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const;
 
-  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const;
+  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
+                         AArch64ABIKind Kind) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
@@ -67,7 +67,7 @@ class AArch64ABIInfo : public ABIInfo {
 
     return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
            : isDarwinPCS()               ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
-                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+                           : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind);
   }
 
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -530,7 +530,8 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
 }
 
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+                                       CodeGenFunction &CGF,
+                                       AArch64ABIKind Kind) const {
   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
                                        CGF.CurFnInfo->getCallingConvention());
   // Empty records are ignored for parameter passing purposes.
@@ -555,7 +556,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+  bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft &&
+               (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
index cbfcccb7c593c1..143377e218a197 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -4,6 +4,9 @@
 // See also llvm/test/CodeGen/AArch64/soft-float-abi.ll, which checks the LLVM
 // backend parts of the soft-float ABI.
 
+// The va_list type does not change between the ABIs
+// CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
 // Floats are passed in integer registers, this will be handled by the backend.
 // CHECK: define dso_local half @test0(half noundef %a)
 // CHECK: define dso_local bfloat @test1(bfloat noundef %a)
@@ -33,6 +36,23 @@ struct B {
 // HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
 struct B test11(struct B a) { return a; }
 
+#include <stdarg.h>
+
+// The layout of the va_list struct is unchanged between the ABIs, but for
+// aapcs-soft, floating-point arguments will be retreived from the GPR save
+// area, as if they were an integer type of the same size.
+// CHECK-LABEL: define dso_local double @test20(i32 noundef %a, ...)
+// CHECK: %vl = alloca %struct.__va_list, align 8
+// SOFT: %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 3
+// SOFT: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 1
+// HARD: %vr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 4
+// HARD: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 2
+double test20(int a, ...) {
+  va_list vl;
+  va_start(vl, a);
+  return va_arg(vl, double);
+}
+
 // Vector types are only available for targets with the correct hardware, and
 // their calling-convention is left undefined by the soft-float ABI, so they
 // aren't tested here.

>From 1f10ed32d2e8173bb80dd67615f0cd300d6c0a16 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 28 Feb 2024 09:27:15 +0000
Subject: [PATCH 5/7] [Clang][Codegen] Continue codegen after recoverable
 errors

Some ABI checks must be done during codegen, and can result in errors
being generated. Previously, clang was stopping codegen after the first
function which emitted an error, so later errors in the file would not
be emitted.

This marks the errors used for ABI checks as recoverable, and changes
ModuleBuilder to only bail out if an unrecoverable error is emitted.
---
 .../clang/Basic/DiagnosticFrontendKinds.td    |  4 ++
 clang/include/clang/Basic/DiagnosticIDs.h     |  3 ++
 clang/lib/Basic/DiagnosticIDs.cpp             |  8 ++++
 clang/lib/CodeGen/ModuleBuilder.cpp           | 16 +++----
 clang/test/CodeGen/target-avx-abi-diag.c      | 42 ++++++++++---------
 5 files changed, 46 insertions(+), 27 deletions(-)

diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index dcd2c19fb7ee36..231646452e730c 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -279,6 +279,8 @@ def err_builtin_needs_feature : Error<"%0 needs target feature %1">;
 def err_function_needs_feature : Error<
   "always_inline function %1 requires target feature '%2', but would "
   "be inlined into function %0 that is compiled without support for '%2'">;
+
+let CategoryName = "Codegen ABI Check" in {
 def err_function_always_inline_attribute_mismatch : Error<
   "always_inline function %1 and its caller %0 have mismatching %2 attributes">;
 def err_function_always_inline_new_za : Error<
@@ -290,6 +292,8 @@ def warn_avx_calling_convention
       InGroup<DiagGroup<"psabi">>;
 def err_avx_calling_convention : Error<warn_avx_calling_convention.Summary>;
 
+}
+
 def err_alias_to_undefined : Error<
   "%select{alias|ifunc}0 must point to a defined "
   "%select{variable or |}1function">;
diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index 0cdda42793f6f0..3e7577a7d90f5d 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -276,6 +276,9 @@ class DiagnosticIDs : public RefCountedBase<DiagnosticIDs> {
   /// category.
   static bool isARCDiagnostic(unsigned DiagID);
 
+  /// Return true if a given diagnostic is a codegen-time ABI check.
+  static bool isCodegenABICheckDiagnostic(unsigned DiagID);
+
   /// Enumeration describing how the emission of a diagnostic should
   /// be treated when it occurs during C++ template argument deduction.
   enum SFINAEResponse {
diff --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index b353a6627f298b..281f719c7ad970 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -855,6 +855,9 @@ bool DiagnosticIDs::isUnrecoverable(unsigned DiagID) const {
   if (isARCDiagnostic(DiagID))
     return false;
 
+  if (isCodegenABICheckDiagnostic(DiagID))
+    return false;
+
   return true;
 }
 
@@ -862,3 +865,8 @@ bool DiagnosticIDs::isARCDiagnostic(unsigned DiagID) {
   unsigned cat = getCategoryNumberForDiag(DiagID);
   return DiagnosticIDs::getCategoryNameFromID(cat).starts_with("ARC ");
 }
+
+bool DiagnosticIDs::isCodegenABICheckDiagnostic(unsigned DiagID) {
+  unsigned cat = getCategoryNumberForDiag(DiagID);
+  return DiagnosticIDs::getCategoryNameFromID(cat) == "Codegen ABI Check";
+}
diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index 3594f4c66e6774..df85295cfb2e29 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -180,7 +180,7 @@ namespace {
 
     bool HandleTopLevelDecl(DeclGroupRef DG) override {
       // FIXME: Why not return false and abort parsing?
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return true;
 
       HandlingTopLevelDeclRAII HandlingDecl(*this);
@@ -206,7 +206,7 @@ namespace {
     }
 
     void HandleInlineFunctionDefinition(FunctionDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       assert(D->doesThisDeclarationHaveABody());
@@ -233,7 +233,7 @@ namespace {
     /// client hack on the type, which can occur at any point in the file
     /// (because these can be defined in declspecs).
     void HandleTagDeclDefinition(TagDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       // Don't allow re-entrant calls to CodeGen triggered by PCH
@@ -269,7 +269,7 @@ namespace {
     }
 
     void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       // Don't allow re-entrant calls to CodeGen triggered by PCH
@@ -283,7 +283,7 @@ namespace {
 
     void HandleTranslationUnit(ASTContext &Ctx) override {
       // Release the Builder when there is no error.
-      if (!Diags.hasErrorOccurred() && Builder)
+      if (!Diags.hasUnrecoverableErrorOccurred() && Builder)
         Builder->Release();
 
       // If there are errors before or when releasing the Builder, reset
@@ -297,14 +297,14 @@ namespace {
     }
 
     void AssignInheritanceModel(CXXRecordDecl *RD) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       Builder->RefreshTypeCacheForClass(RD);
     }
 
     void CompleteTentativeDefinition(VarDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       Builder->EmitTentativeDefinition(D);
@@ -315,7 +315,7 @@ namespace {
     }
 
     void HandleVTable(CXXRecordDecl *RD) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       Builder->EmitVTable(RD);
diff --git a/clang/test/CodeGen/target-avx-abi-diag.c b/clang/test/CodeGen/target-avx-abi-diag.c
index 84be9e252db5cc..dfbbc3213ca6b2 100644
--- a/clang/test/CodeGen/target-avx-abi-diag.c
+++ b/clang/test/CodeGen/target-avx-abi-diag.c
@@ -2,13 +2,9 @@
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx -verify=no512 -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -verify=both -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature +evex512 -verify=both -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=1 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=2 -DNOEVEX512 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=3 -DNOEVEX512 -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DNOEVEX512 -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-512 -verify=both -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -DAVX512_ERR=1 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -DAVX512_ERR=2 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -DAVX512_ERR=3 -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -o - -S
 // REQUIRES: x86-registered-target
 
 // both-no-diagnostics
@@ -25,7 +21,6 @@ void takesAvx512_no_target(avx512fType t);
 void variadic(int i, ...);
 __attribute__((target("avx512f"))) void variadic_err(int i, ...);
 
-#if !defined(AVX512_ERR) || AVX512_ERR == 1
 // If neither side has an attribute, warn.
 void call_warn(void) {
   avx256Type t1;
@@ -39,9 +34,7 @@ void call_warn(void) {
   // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   variadic(3, t2); // no512-warning {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 }
-#endif
 
-#if !defined(AVX512_ERR) || AVX512_ERR == 2
 // If only 1 side has an attribute, error.
 void call_errors(void) {
   avx256Type t1;
@@ -54,16 +47,21 @@ void call_errors(void) {
   // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   variadic_err(3, t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 }
-#if defined(__AVX10_1__)
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
-#endif
 
-#if !defined(AVX512_ERR) || AVX512_ERR == 3
+// Check that these errors are treated as non-fatal, so we can report them for
+// all functions, not just the first.
+void call_errors_2(void) {
+  avx256Type t1;
+  takesAvx256(t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
+  avx512fType t2;
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
+  takesAvx512(t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
+
+  variadic_err(1, t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
+  variadic_err(3, t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
+}
+
 __attribute__((target("avx"))) void call_avx256_ok(void) {
   avx256Type t;
   takesAvx256(t);
@@ -93,5 +91,11 @@ __attribute__((target("avx512f"))) void call_avx512_ok2(void) {
 // avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
 // avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
 // avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
 #endif

>From 5535c5daa0a09e4e62d3fba61f56be8c7bad8c89 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 28 Feb 2024 09:29:54 +0000
Subject: [PATCH 6/7] [Clang][AArch64] Check AArch64 float ABI during codegen

When using the hard-float ABI (the default) on an AArch64 target without
floating-point instructions or registers, we cannot correctly compile
any functions which require passing arguments or return values in
floating-point registers, so we need to report an error.

GCC does this check after optimisations have happened, so it's not
possible to do it in clang in a perfectly compatible way. Instead, this
patch does the check in clang codegen, so that we can ignore unused
inline functions.

GCC also errors on non-trivial uses of floating-point values outside of
function arguments/returns, but again does this after optimisation, so
there's no simple rules for what should and should not be allowed.
However, this code does not have ABI implications, so we can allow most
uses of floating-point values where GCC does not.

I've tested this by building the linux kernel, which the previous
version of this (doing the check in Sema) caused a lot of error in.
---
 .../clang/Basic/DiagnosticFrontendKinds.td    |  2 +
 clang/lib/CodeGen/CodeGenFunction.cpp         |  2 +
 clang/lib/CodeGen/TargetInfo.h                |  5 +
 clang/lib/CodeGen/Targets/AArch64.cpp         | 32 ++++++
 .../CodeGen/aarch64-soft-float-abi-errors.c   | 99 +++++++++++++++++++
 clang/test/Sema/arm-vector-types-support.c    |  2 +
 6 files changed, 142 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-soft-float-abi-errors.c

diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 231646452e730c..cdccea98b31f09 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -292,6 +292,8 @@ def warn_avx_calling_convention
       InGroup<DiagGroup<"psabi">>;
 def err_avx_calling_convention : Error<warn_avx_calling_convention.Summary>;
 
+def err_target_unsupported_type_for_abi
+    : Error<"%0 requires %1 type support, but ABI '%2' does not support it">;
 }
 
 def err_alias_to_undefined : Error<
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index b87fc86f4e635f..6d7bb823908431 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1360,6 +1360,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   FunctionArgList Args;
   QualType ResTy = BuildFunctionArgList(GD, Args);
 
+  CGM.getTargetCodeGenInfo().checkFunctionABI(CGM, FD);
+
   if (FD->isInlineBuiltinDeclaration()) {
     // When generating code for a builtin with an inline declaration, use a
     // mangled name to hold the actual body, while keeping an external
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 0571e828bb1d4a..6893b50a3cfe90 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -84,6 +84,11 @@ class TargetCodeGenInfo {
   /// Provides a convenient hook to handle extra target-specific globals.
   virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
 
+  /// Any further codegen related checks that need to be done on a function
+  /// signature in a target specific manner.
+  virtual void checkFunctionABI(CodeGenModule &CGM,
+                                const FunctionDecl *Decl) const {}
+
   /// Any further codegen related checks that need to be done on a function call
   /// in a target specific manner.
   virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 4164ab0c4fe235..2ff067157d034b 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -27,6 +27,8 @@ class AArch64ABIInfo : public ABIInfo {
   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
       : ABIInfo(CGT), Kind(Kind) {}
 
+  bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
+
 private:
   AArch64ABIKind getABIKind() const { return Kind; }
   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
@@ -163,6 +165,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
   }
 
+  void checkFunctionABI(CodeGenModule &CGM,
+                        const FunctionDecl *Decl) const override;
+
   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
                             const FunctionDecl *Caller,
                             const FunctionDecl *Callee,
@@ -848,6 +853,33 @@ static bool isStreamingCompatible(const FunctionDecl *F) {
   return false;
 }
 
+void AArch64TargetCodeGenInfo::checkFunctionABI(
+    CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
+  const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
+  const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
+
+  // If we are using a hard-float ABI, but do not have floating point
+  // registers, then report an error for any function arguments or returns
+  // which would be passed in floating-pint registers.
+  auto CheckType = [&CGM, &TI, &ABIInfo](const QualType &Ty, const NamedDecl *D) {
+    const Type *HABase = nullptr;
+    uint64_t HAMembers = 0;
+    if (Ty->isFloatingType() || Ty->isVectorType() ||
+        ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
+      CGM.getDiags().Report(D->getLocation(),
+                            diag::err_target_unsupported_type_for_abi)
+          << D->getDeclName() << Ty << TI.getABI();
+    }
+  };
+
+  if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
+    CheckType(FuncDecl->getReturnType(), FuncDecl);
+    for (ParmVarDecl *PVD : FuncDecl->parameters()) {
+      CheckType(PVD->getType(), PVD);
+    }
+  }
+}
+
 void AArch64TargetCodeGenInfo::checkFunctionCallABI(
     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
     const FunctionDecl *Callee, const CallArgList &Args) const {
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
new file mode 100644
index 00000000000000..3e5ab9e92a1d8c
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
@@ -0,0 +1,99 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +fp-armv8 -S -target-abi aapcs      -verify=fp-hard %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -target-abi aapcs-soft -verify=nofp-soft %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -target-abi aapcs      -verify=nofp-hard %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -target-abi aapcs -O1  -verify=nofp-hard,nofp-hard-opt -emit-llvm %s
+// No run line needed for soft-float ABI with an FPU because that is rejected by the driver
+
+// With the hard-float ABI and a target with an FPU, FP arguments are passed in
+// FP registers, no diagnostics needed.
+// fp-hard-no-diagnostics
+
+// With the soft-float ABI, FP arguments are passed in integer registers, no
+// diagnostics needed.
+// nofp-soft-no-diagnostics
+
+// With the hard-float ABI but no FPU, FP arguments cannot be passed in an
+// ABI-compatible way, so we report errors for these cases:
+
+struct HFA {
+  float x, y;
+};
+
+struct non_HFA {
+  float x;
+  int y;
+};
+
+// Floating-point arguments are returns are rejected
+void test_fp16_arg(__fp16 a) {}
+// nofp-hard-error at -1 {{'a' requires '__fp16' type support, but ABI 'aapcs' does not support it}}
+__fp16 test_fp16_ret(void) { return 3.141; }
+// nofp-hard-error at -1 {{'test_fp16_ret' requires '__fp16' type support, but ABI 'aapcs' does not support it}}
+void test_float_arg(float a) {}
+// nofp-hard-error at -1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
+float test_float_ret(void) { return 3.141f; }
+// nofp-hard-error at -1 {{'test_float_ret' requires 'float' type support, but ABI 'aapcs' does not support it}}
+void test_double_arg(double a) {}
+// nofp-hard-error at -1 {{'a' requires 'double' type support, but ABI 'aapcs' does not support it}}
+double test_double_ret(void) { return 3.141; }
+// nofp-hard-error at -1 {{'test_double_ret' requires 'double' type support, but ABI 'aapcs' does not support it}}
+void test_long_double_arg(long double a) {}
+// nofp-hard-error at -1 {{'a' requires 'long double' type support, but ABI 'aapcs' does not support it}}
+long double test_long_double_ret(void) { return 3.141L; }
+// nofp-hard-error at -1 {{'test_long_double_ret' requires 'long double' type support, but ABI 'aapcs' does not support it}}
+
+// HFAs would be passed in floating-point registers, so are rejected.
+void test_hfa_arg(struct HFA a) {}
+// nofp-hard-error at -1 {{'a' requires 'struct HFA' type support, but ABI 'aapcs' does not support it}}
+struct HFA test_hfa_ret(void) { return (struct HFA){}; }
+// nofp-hard-error at -1 {{'test_hfa_ret' requires 'struct HFA' type support, but ABI 'aapcs' does not support it}}
+
+// Note: vector types cannot be created at all for targets without an FPU, so
+// it is not possible to create a function which passes/returns them when using
+// either the default or soft-float ABI. This is tested elsewhere.
+
+// This struct contains a floating-point type, but is not an HFA, so can be
+// passed/returned without affecting the ABI.
+struct non_HFA test_non_hfa_ret(void) { return (struct non_HFA){}; }
+void test_non_hfa_arg(struct non_HFA a) {}
+
+// This inline function does not get code-generated because there is no use of
+// it in this file, so we we don't emit an error for it, matching GCC's
+// behaviour.
+inline void test_float_arg_inline(float a) {}
+
+// This inline function is used, so we emit the error if we generate code for
+// it. The code isn't generated at -O0, so no error is emitted there.
+inline void test_float_arg_inline_used(float a) {}
+// nofp-hard-opt-error at -1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
+void use_inline() { test_float_arg_inline_used(1.0f); }
+
+// The always_inline attribute causes an inline function to always be
+// code-genned, even at -O0, so we always emit the error.
+__attribute((always_inline))
+inline void test_float_arg_always_inline_used(float a) {}
+// nofp-hard-error at -1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
+void use_always_inline() { test_float_arg_always_inline_used(1.0f); }
+
+// Floating-point expressions, global variables and local variables do not
+// affect the ABI, so are allowed. GCC does reject some uses of floating point
+// types like this, but it does so after optimisation, which we can't
+// accurately match in clang.
+int test_expr_float(int a) { return a + 1.0f; }
+int test_expr_double(int a) { return a + 1.0; }
+
+float global_float = 2.0f * 3.5f;
+float global_double = 2.0 * 3.5;
+
+int test_var_float(int a) {
+  float f = a;
+  f *= 6.0;
+  return (int)f;
+}
+int test_var_double(int a) {
+  double d = a;
+  d *= 6.0;
+  return (int)d;
+}
diff --git a/clang/test/Sema/arm-vector-types-support.c b/clang/test/Sema/arm-vector-types-support.c
index 83a83ddfe78017..ed5f5ba175a94a 100644
--- a/clang/test/Sema/arm-vector-types-support.c
+++ b/clang/test/Sema/arm-vector-types-support.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 %s -triple armv7 -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple aarch64 -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -fsyntax-only -verify
 
 typedef __attribute__((neon_vector_type(2))) int int32x2_t; // expected-error{{'neon_vector_type' attribute is not supported on targets missing 'neon', 'mve', 'sve' or 'sme'; specify an appropriate -march= or -mcpu=}}
 typedef __attribute__((neon_polyvector_type(16))) short poly8x16_t; // expected-error{{'neon_polyvector_type' attribute is not supported on targets missing 'neon' or 'mve'; specify an appropriate -march= or -mcpu=}}

>From 6198693500df9f53c305adda9287b00b6c04850d Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 6 Mar 2024 10:04:28 +0000
Subject: [PATCH 7/7] clang-format

---
 clang/lib/CodeGen/Targets/AArch64.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 2ff067157d034b..e1d49340ff1a8c 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -861,7 +861,8 @@ void AArch64TargetCodeGenInfo::checkFunctionABI(
   // If we are using a hard-float ABI, but do not have floating point
   // registers, then report an error for any function arguments or returns
   // which would be passed in floating-pint registers.
-  auto CheckType = [&CGM, &TI, &ABIInfo](const QualType &Ty, const NamedDecl *D) {
+  auto CheckType = [&CGM, &TI, &ABIInfo](const QualType &Ty,
+                                         const NamedDecl *D) {
     const Type *HABase = nullptr;
     uint64_t HAMembers = 0;
     if (Ty->isFloatingType() || Ty->isVectorType() ||