[clang] ef395a4 - [AArch64] Add soft-float ABI (#84146)

Tue Mar 19 06:58:56 PDT 2024

Author: ostannard
Date: 2024-03-19T13:58:51Z
New Revision: ef395a492aa931f428e99e1c0a93d4ad2fb0fcfa

URL: https://github.com/llvm/llvm-project/commit/ef395a492aa931f428e99e1c0a93d4ad2fb0fcfa
DIFF: https://github.com/llvm/llvm-project/commit/ef395a492aa931f428e99e1c0a93d4ad2fb0fcfa.diff

LOG: [AArch64] Add soft-float ABI (#84146)

This is re-working of #74460, which adds a soft-float ABI for AArch64.
That was reverted because it causes errors when building the linux and
fuchsia kernels.

The problem is that GCC's implementation of the ABI compatibility checks
when using the hard-float ABI on a target without FP registers does it's
checks after optimisation. The previous version of this patch reported
errors for all uses of floating-point types, which is stricter than what
GCC does in practice.

This changes two things compared to the first version:
* Only check the types of function arguments and returns, not the types
of other values. This is more relaxed than GCC, while still guaranteeing
ABI compatibility.
* Move the check from Sema to CodeGen, so that inline functions are only
checked if they are actually used. There are some cases in the linux
kernel which depend on this behaviour of GCC.

Added: 
    clang/test/CodeGen/aarch64-soft-float-abi-errors.c
    clang/test/CodeGen/aarch64-soft-float-abi.c
    clang/test/Driver/aarch64-soft-float-abi.c
    llvm/test/CodeGen/AArch64/soft-float-abi.ll

Modified: 
    clang/include/clang/Basic/DiagnosticCommonKinds.td
    clang/include/clang/Basic/DiagnosticFrontendKinds.td
    clang/include/clang/Basic/DiagnosticIDs.h
    clang/lib/Basic/DiagnosticIDs.cpp
    clang/lib/Basic/Targets/AArch64.cpp
    clang/lib/Basic/Targets/AArch64.h
    clang/lib/CodeGen/CodeGenFunction.cpp
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/CodeGen/ModuleBuilder.cpp
    clang/lib/CodeGen/TargetInfo.h
    clang/lib/CodeGen/Targets/AArch64.cpp
    clang/test/CodeGen/attr-target-clones-aarch64.c
    clang/test/CodeGen/target-avx-abi-diag.c
    clang/test/Preprocessor/aarch64-target-features.c
    clang/test/Sema/arm-vector-types-support.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 08bb1d81ba29f1..43e132e5665850 100644

--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -356,6 +356,8 @@ def warn_target_unrecognized_env : Warning<
 def warn_knl_knm_isa_support_removed : Warning<
   "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.">,
   InGroup<DiagGroup<"knl-knm-isa-support-removed">>;
+def err_target_unsupported_abi_with_fpu : Error<
+  "'%0' ABI is not supported with FPU">;
 
 // Source manager
 def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;

diff  --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
index 794a0a82be6d74..ba23cf84c5e343 100644
--- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td
+++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td
@@ -281,6 +281,8 @@ def err_builtin_needs_feature : Error<"%0 needs target feature %1">;
 def err_function_needs_feature : Error<
   "always_inline function %1 requires target feature '%2', but would "
   "be inlined into function %0 that is compiled without support for '%2'">;
+
+let CategoryName = "Codegen ABI Check" in {
 def err_function_always_inline_attribute_mismatch : Error<
   "always_inline function %1 and its caller %0 have mismatching %2 attributes">;
 def err_function_always_inline_new_za : Error<
@@ -292,6 +294,10 @@ def warn_avx_calling_convention
       InGroup<DiagGroup<"psabi">>;
 def err_avx_calling_convention : Error<warn_avx_calling_convention.Summary>;
 
+def err_target_unsupported_type_for_abi
+    : Error<"%0 requires %1 type support, but ABI '%2' does not support it">;
+}
+
 def err_alias_to_undefined : Error<
   "%select{alias|ifunc}0 must point to a defined "
   "%select{variable or |}1function">;

diff  --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h
index 95b502b1e97ab1..5ff782c7f8c7e8 100644
--- a/clang/include/clang/Basic/DiagnosticIDs.h
+++ b/clang/include/clang/Basic/DiagnosticIDs.h
@@ -278,6 +278,9 @@ class DiagnosticIDs : public RefCountedBase<DiagnosticIDs> {
   /// category.
   static bool isARCDiagnostic(unsigned DiagID);
 
+  /// Return true if a given diagnostic is a codegen-time ABI check.
+  static bool isCodegenABICheckDiagnostic(unsigned DiagID);
+
   /// Enumeration describing how the emission of a diagnostic should
   /// be treated when it occurs during C++ template argument deduction.
   enum SFINAEResponse {

diff  --git a/clang/lib/Basic/DiagnosticIDs.cpp b/clang/lib/Basic/DiagnosticIDs.cpp
index 4138b4ee4b0e5d..cd42573968b212 100644
--- a/clang/lib/Basic/DiagnosticIDs.cpp
+++ b/clang/lib/Basic/DiagnosticIDs.cpp
@@ -861,6 +861,9 @@ bool DiagnosticIDs::isUnrecoverable(unsigned DiagID) const {
   if (isARCDiagnostic(DiagID))
     return false;
 
+  if (isCodegenABICheckDiagnostic(DiagID))
+    return false;
+
   return true;
 }
 
@@ -868,3 +871,8 @@ bool DiagnosticIDs::isARCDiagnostic(unsigned DiagID) {
   unsigned cat = getCategoryNumberForDiag(DiagID);
   return DiagnosticIDs::getCategoryNameFromID(cat).starts_with("ARC ");
 }
+
+bool DiagnosticIDs::isCodegenABICheckDiagnostic(unsigned DiagID) {
+  unsigned cat = getCategoryNumberForDiag(DiagID);
+  return DiagnosticIDs::getCategoryNameFromID(cat) == "Codegen ABI Check";
+}

diff  --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index fa8b5a869594b3..1c3199bd76eed6 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
@@ -200,13 +201,23 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
 StringRef AArch64TargetInfo::getABI() const { return ABI; }
 
 bool AArch64TargetInfo::setABI(const std::string &Name) {
-  if (Name != "aapcs" && Name != "darwinpcs")
+  if (Name != "aapcs" && Name != "aapcs-soft" && Name != "darwinpcs")
     return false;
 
   ABI = Name;
   return true;
 }
 
+bool AArch64TargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
+  if (hasFeature("fp") && ABI == "aapcs-soft") {
+    // aapcs-soft is not allowed for targets with an FPU, to avoid there being
+    // two incomatible ABIs.
+    Diags.Report(diag::err_target_unsupported_abi_with_fpu) << ABI;
+    return false;
+  }
+  return true;
+}
+
 bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef,
                                                  BranchProtectionInfo &BPI,
                                                  StringRef &Err) const {
@@ -681,7 +692,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Cases("aarch64", "arm64", "arm", true)
       .Case("fmv", HasFMV)
-      .Cases("neon", "fp", "simd", FPU & NeonMode)
+      .Case("fp", FPU & FPUMode)
+      .Cases("neon", "simd", FPU & NeonMode)
       .Case("jscvt", HasJSCVT)
       .Case("fcma", HasFCMA)
       .Case("rng", HasRandGen)

diff  --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 2dd6b2181e87df..542894c66412dc 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -201,6 +201,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool hasInt128Type() const override;
 
   bool hasBitIntType() const override { return true; }
+
+  bool validateTarget(DiagnosticsEngine &Diags) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {

diff  --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 4a3ff49b0007a3..98bea8209cf3fe 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1361,6 +1361,8 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn,
   FunctionArgList Args;
   QualType ResTy = BuildFunctionArgList(GD, Args);
 
+  CGM.getTargetCodeGenInfo().checkFunctionABI(CGM, FD);
+
   if (FD->isInlineBuiltinDeclaration()) {
     // When generating code for a builtin with an inline declaration, use a
     // mangled name to hold the actual body, while keeping an external

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 8ceecff28cbc63..bb26bfcddaeb78 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -145,6 +145,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
       Kind = AArch64ABIKind::DarwinPCS;
     else if (Triple.isOSWindows())
       return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
+    else if (Target.getABI() == "aapcs-soft")
+      Kind = AArch64ABIKind::AAPCSSoft;
 
     return createAArch64TargetCodeGenInfo(CGM, Kind);
   }

diff  --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp
index 3594f4c66e6774..df85295cfb2e29 100644
--- a/clang/lib/CodeGen/ModuleBuilder.cpp
+++ b/clang/lib/CodeGen/ModuleBuilder.cpp
@@ -180,7 +180,7 @@ namespace {
 
     bool HandleTopLevelDecl(DeclGroupRef DG) override {
       // FIXME: Why not return false and abort parsing?
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return true;
 
       HandlingTopLevelDeclRAII HandlingDecl(*this);
@@ -206,7 +206,7 @@ namespace {
     }
 
     void HandleInlineFunctionDefinition(FunctionDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       assert(D->doesThisDeclarationHaveABody());
@@ -233,7 +233,7 @@ namespace {
     /// client hack on the type, which can occur at any point in the file
     /// (because these can be defined in declspecs).
     void HandleTagDeclDefinition(TagDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       // Don't allow re-entrant calls to CodeGen triggered by PCH
@@ -269,7 +269,7 @@ namespace {
     }
 
     void HandleTagDeclRequiredDefinition(const TagDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       // Don't allow re-entrant calls to CodeGen triggered by PCH
@@ -283,7 +283,7 @@ namespace {
 
     void HandleTranslationUnit(ASTContext &Ctx) override {
       // Release the Builder when there is no error.
-      if (!Diags.hasErrorOccurred() && Builder)
+      if (!Diags.hasUnrecoverableErrorOccurred() && Builder)
         Builder->Release();
 
       // If there are errors before or when releasing the Builder, reset
@@ -297,14 +297,14 @@ namespace {
     }
 
     void AssignInheritanceModel(CXXRecordDecl *RD) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       Builder->RefreshTypeCacheForClass(RD);
     }
 
     void CompleteTentativeDefinition(VarDecl *D) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       Builder->EmitTentativeDefinition(D);
@@ -315,7 +315,7 @@ namespace {
     }
 
     void HandleVTable(CXXRecordDecl *RD) override {
-      if (Diags.hasErrorOccurred())
+      if (Diags.hasUnrecoverableErrorOccurred())
         return;
 
       Builder->EmitVTable(RD);

diff  --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 7682f197041c74..6893b50a3cfe90 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -84,6 +84,11 @@ class TargetCodeGenInfo {
   /// Provides a convenient hook to handle extra target-specific globals.
   virtual void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const {}
 
+  /// Any further codegen related checks that need to be done on a function
+  /// signature in a target specific manner.
+  virtual void checkFunctionABI(CodeGenModule &CGM,
+                                const FunctionDecl *Decl) const {}
+
   /// Any further codegen related checks that need to be done on a function call
   /// in a target specific manner.
   virtual void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
@@ -416,6 +421,7 @@ enum class AArch64ABIKind {
   AAPCS = 0,
   DarwinPCS,
   Win64,
+  AAPCSSoft,
 };
 
 std::unique_ptr<TargetCodeGenInfo>

diff  --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 85117366de0ee8..4c32f510101f04 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -27,6 +27,8 @@ class AArch64ABIInfo : public ABIInfo {
   AArch64ABIInfo(CodeGenTypes &CGT, AArch64ABIKind Kind)
       : ABIInfo(CGT), Kind(Kind) {}
 
+  bool isSoftFloat() const { return Kind == AArch64ABIKind::AAPCSSoft; }
+
 private:
   AArch64ABIKind getABIKind() const { return Kind; }
   bool isDarwinPCS() const { return Kind == AArch64ABIKind::DarwinPCS; }
@@ -55,8 +57,8 @@ class AArch64ABIInfo : public ABIInfo {
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const;
 
-  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const;
+  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
+                         AArch64ABIKind Kind) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
@@ -67,7 +69,7 @@ class AArch64ABIInfo : public ABIInfo {
 
     return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
            : isDarwinPCS()               ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
-                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+                           : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind);
   }
 
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -163,6 +165,9 @@ class AArch64TargetCodeGenInfo : public TargetCodeGenInfo {
     return TargetCodeGenInfo::isScalarizableAsmOperand(CGF, Ty);
   }
 
+  void checkFunctionABI(CodeGenModule &CGM,
+                        const FunctionDecl *Decl) const override;
+
   void checkFunctionCallABI(CodeGenModule &CGM, SourceLocation CallLoc,
                             const FunctionDecl *Caller,
                             const FunctionDecl *Callee,
@@ -494,6 +499,11 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // For the soft-float ABI variant, no types are considered to be homogeneous
+  // aggregates.
+  if (Kind == AArch64ABIKind::AAPCSSoft)
+    return false;
+
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
   // but with the 
diff erence that any floating-point type is allowed,
@@ -525,7 +535,8 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
 }
 
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+                                       CodeGenFunction &CGF,
+                                       AArch64ABIKind Kind) const {
   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
                                        CGF.CurFnInfo->getCallingConvention());
   // Empty records are ignored for parameter passing purposes.
@@ -550,7 +561,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+  bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft &&
+               (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
@@ -841,6 +853,34 @@ static bool isStreamingCompatible(const FunctionDecl *F) {
   return false;
 }
 
+void AArch64TargetCodeGenInfo::checkFunctionABI(
+    CodeGenModule &CGM, const FunctionDecl *FuncDecl) const {
+  const AArch64ABIInfo &ABIInfo = getABIInfo<AArch64ABIInfo>();
+  const TargetInfo &TI = ABIInfo.getContext().getTargetInfo();
+
+  // If we are using a hard-float ABI, but do not have floating point
+  // registers, then report an error for any function arguments or returns
+  // which would be passed in floating-pint registers.
+  auto CheckType = [&CGM, &TI, &ABIInfo](const QualType &Ty,
+                                         const NamedDecl *D) {
+    const Type *HABase = nullptr;
+    uint64_t HAMembers = 0;
+    if (Ty->isFloatingType() || Ty->isVectorType() ||
+        ABIInfo.isHomogeneousAggregate(Ty, HABase, HAMembers)) {
+      CGM.getDiags().Report(D->getLocation(),
+                            diag::err_target_unsupported_type_for_abi)
+          << D->getDeclName() << Ty << TI.getABI();
+    }
+  };
+
+  if (!TI.hasFeature("fp") && !ABIInfo.isSoftFloat()) {
+    CheckType(FuncDecl->getReturnType(), FuncDecl);
+    for (ParmVarDecl *PVD : FuncDecl->parameters()) {
+      CheckType(PVD->getType(), PVD);
+    }
+  }
+}
+
 void AArch64TargetCodeGenInfo::checkFunctionCallABI(
     CodeGenModule &CGM, SourceLocation CallLoc, const FunctionDecl *Caller,
     const FunctionDecl *Callee, const CallArgList &Args) const {

diff  --git a/clang/test/CodeGen/aarch64-soft-float-abi-errors.c b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
new file mode 100644
index 00000000000000..3e5ab9e92a1d8c
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi-errors.c
@@ -0,0 +1,99 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +fp-armv8 -S -target-abi aapcs      -verify=fp-hard %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -target-abi aapcs-soft -verify=nofp-soft %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -target-abi aapcs      -verify=nofp-hard %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -target-abi aapcs -O1  -verify=nofp-hard,nofp-hard-opt -emit-llvm %s
+// No run line needed for soft-float ABI with an FPU because that is rejected by the driver
+
+// With the hard-float ABI and a target with an FPU, FP arguments are passed in
+// FP registers, no diagnostics needed.
+// fp-hard-no-diagnostics
+
+// With the soft-float ABI, FP arguments are passed in integer registers, no
+// diagnostics needed.
+// nofp-soft-no-diagnostics
+
+// With the hard-float ABI but no FPU, FP arguments cannot be passed in an
+// ABI-compatible way, so we report errors for these cases:
+
+struct HFA {
+  float x, y;
+};
+
+struct non_HFA {
+  float x;
+  int y;
+};
+
+// Floating-point arguments are returns are rejected
+void test_fp16_arg(__fp16 a) {}
+// nofp-hard-error at -1 {{'a' requires '__fp16' type support, but ABI 'aapcs' does not support it}}
+__fp16 test_fp16_ret(void) { return 3.141; }
+// nofp-hard-error at -1 {{'test_fp16_ret' requires '__fp16' type support, but ABI 'aapcs' does not support it}}
+void test_float_arg(float a) {}
+// nofp-hard-error at -1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
+float test_float_ret(void) { return 3.141f; }
+// nofp-hard-error at -1 {{'test_float_ret' requires 'float' type support, but ABI 'aapcs' does not support it}}
+void test_double_arg(double a) {}
+// nofp-hard-error at -1 {{'a' requires 'double' type support, but ABI 'aapcs' does not support it}}
+double test_double_ret(void) { return 3.141; }
+// nofp-hard-error at -1 {{'test_double_ret' requires 'double' type support, but ABI 'aapcs' does not support it}}
+void test_long_double_arg(long double a) {}
+// nofp-hard-error at -1 {{'a' requires 'long double' type support, but ABI 'aapcs' does not support it}}
+long double test_long_double_ret(void) { return 3.141L; }
+// nofp-hard-error at -1 {{'test_long_double_ret' requires 'long double' type support, but ABI 'aapcs' does not support it}}
+
+// HFAs would be passed in floating-point registers, so are rejected.
+void test_hfa_arg(struct HFA a) {}
+// nofp-hard-error at -1 {{'a' requires 'struct HFA' type support, but ABI 'aapcs' does not support it}}
+struct HFA test_hfa_ret(void) { return (struct HFA){}; }
+// nofp-hard-error at -1 {{'test_hfa_ret' requires 'struct HFA' type support, but ABI 'aapcs' does not support it}}
+
+// Note: vector types cannot be created at all for targets without an FPU, so
+// it is not possible to create a function which passes/returns them when using
+// either the default or soft-float ABI. This is tested elsewhere.
+
+// This struct contains a floating-point type, but is not an HFA, so can be
+// passed/returned without affecting the ABI.
+struct non_HFA test_non_hfa_ret(void) { return (struct non_HFA){}; }
+void test_non_hfa_arg(struct non_HFA a) {}
+
+// This inline function does not get code-generated because there is no use of
+// it in this file, so we we don't emit an error for it, matching GCC's
+// behaviour.
+inline void test_float_arg_inline(float a) {}
+
+// This inline function is used, so we emit the error if we generate code for
+// it. The code isn't generated at -O0, so no error is emitted there.
+inline void test_float_arg_inline_used(float a) {}
+// nofp-hard-opt-error at -1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
+void use_inline() { test_float_arg_inline_used(1.0f); }
+
+// The always_inline attribute causes an inline function to always be
+// code-genned, even at -O0, so we always emit the error.
+__attribute((always_inline))
+inline void test_float_arg_always_inline_used(float a) {}
+// nofp-hard-error at -1 {{'a' requires 'float' type support, but ABI 'aapcs' does not support it}}
+void use_always_inline() { test_float_arg_always_inline_used(1.0f); }
+
+// Floating-point expressions, global variables and local variables do not
+// affect the ABI, so are allowed. GCC does reject some uses of floating point
+// types like this, but it does so after optimisation, which we can't
+// accurately match in clang.
+int test_expr_float(int a) { return a + 1.0f; }
+int test_expr_double(int a) { return a + 1.0; }
+
+float global_float = 2.0f * 3.5f;
+float global_double = 2.0 * 3.5;
+
+int test_var_float(int a) {
+  float f = a;
+  f *= 6.0;
+  return (int)f;
+}
+int test_var_double(int a) {
+  double d = a;
+  d *= 6.0;
+  return (int)d;
+}

diff  --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..143377e218a197
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -0,0 +1,58 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -target-abi aapcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
+
+// See also llvm/test/CodeGen/AArch64/soft-float-abi.ll, which checks the LLVM
+// backend parts of the soft-float ABI.
+
+// The va_list type does not change between the ABIs
+// CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+// Floats are passed in integer registers, this will be handled by the backend.
+// CHECK: define dso_local half @test0(half noundef %a)
+// CHECK: define dso_local bfloat @test1(bfloat noundef %a)
+// CHECK: define dso_local float @test2(float noundef %a)
+// CHECK: define dso_local double @test3(double noundef %a)
+// CHECK: define dso_local fp128 @test4(fp128 noundef %a)
+__fp16 test0(__fp16 a) { return a; }
+__bf16 test1(__bf16 a) { return a; }
+float test2(float a) { return a; }
+double test3(double a) { return a; }
+long double test4(long double a) { return a; }
+
+// No types are considered to be HFAs or HVAs by the soft-float PCS, so these
+// are converted to integer types.
+struct A {
+  float x;
+};
+// SOFT: define dso_local i32 @test10(i64 %a.coerce)
+// HARD: define dso_local %struct.A @test10([1 x float] alignstack(8) %a.coerce)
+struct A test10(struct A a) { return a; }
+
+struct B {
+  double x;
+  double y;
+};
+// SOFT: define dso_local [2 x i64] @test11([2 x i64] %a.coerce)
+// HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
+struct B test11(struct B a) { return a; }
+
+#include <stdarg.h>
+
+// The layout of the va_list struct is unchanged between the ABIs, but for
+// aapcs-soft, floating-point arguments will be retreived from the GPR save
+// area, as if they were an integer type of the same size.
+// CHECK-LABEL: define dso_local double @test20(i32 noundef %a, ...)
+// CHECK: %vl = alloca %struct.__va_list, align 8
+// SOFT: %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 3
+// SOFT: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 1
+// HARD: %vr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 4
+// HARD: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 2
+double test20(int a, ...) {
+  va_list vl;
+  va_start(vl, a);
+  return va_arg(vl, double);
+}
+
+// Vector types are only available for targets with the correct hardware, and
+// their calling-convention is left undefined by the soft-float ABI, so they
+// aren't tested here.

diff  --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 276a7b87b7a1b4..94095f9aa3e1f4 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -23,8 +23,6 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 
 
 
-
-
 //.
 // CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
 // CHECK: @ftc.ifunc = weak_odr alias i32 (), ptr @ftc
@@ -177,15 +175,7 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK:       resolver_return:
 // CHECK-NEXT:    ret ptr @ftc_dup2._McrcMdotprod
 // CHECK:       resolver_else:
-// CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr @__aarch64_cpu_features, align 8
-// CHECK-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], 256
-// CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 256
-// CHECK-NEXT:    [[TMP7:%.*]] = and i1 true, [[TMP6]]
-// CHECK-NEXT:    br i1 [[TMP7]], label [[RESOLVER_RETURN1:%.*]], label [[RESOLVER_ELSE2:%.*]]
-// CHECK:       resolver_return1:
 // CHECK-NEXT:    ret ptr @ftc_dup2._Mfp
-// CHECK:       resolver_else2:
-// CHECK-NEXT:    ret ptr @ftc_dup2.default
 //
 //
 // CHECK: Function Attrs: noinline nounwind optnone

diff  --git a/clang/test/CodeGen/target-avx-abi-diag.c b/clang/test/CodeGen/target-avx-abi-diag.c
index 84be9e252db5cc..dfbbc3213ca6b2 100644
--- a/clang/test/CodeGen/target-avx-abi-diag.c
+++ b/clang/test/CodeGen/target-avx-abi-diag.c
@@ -2,13 +2,9 @@
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx -verify=no512 -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -verify=both -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature +evex512 -verify=both -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=1 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=2 -DNOEVEX512 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DAVX512_ERR=3 -DNOEVEX512 -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx512f -target-feature -evex512 -verify=avx512-256 -DNOEVEX512 -o - -S
 // RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-512 -verify=both -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -DAVX512_ERR=1 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -DAVX512_ERR=2 -o - -S
-// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -DAVX512_ERR=3 -o - -S
+// RUN: %clang_cc1 %s -triple=x86_64-linux-gnu -target-feature +avx10.1-256 -verify=avx512-256 -o - -S
 // REQUIRES: x86-registered-target
 
 // both-no-diagnostics
@@ -25,7 +21,6 @@ void takesAvx512_no_target(avx512fType t);
 void variadic(int i, ...);
 __attribute__((target("avx512f"))) void variadic_err(int i, ...);
 
-#if !defined(AVX512_ERR) || AVX512_ERR == 1
 // If neither side has an attribute, warn.
 void call_warn(void) {
   avx256Type t1;
@@ -39,9 +34,7 @@ void call_warn(void) {
   // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   variadic(3, t2); // no512-warning {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 }
-#endif
 
-#if !defined(AVX512_ERR) || AVX512_ERR == 2
 // If only 1 side has an attribute, error.
 void call_errors(void) {
   avx256Type t1;
@@ -54,16 +47,21 @@ void call_errors(void) {
   // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
   variadic_err(3, t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
 }
-#if defined(__AVX10_1__)
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
-#endif
 
-#if !defined(AVX512_ERR) || AVX512_ERR == 3
+// Check that these errors are treated as non-fatal, so we can report them for
+// all functions, not just the first.
+void call_errors_2(void) {
+  avx256Type t1;
+  takesAvx256(t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
+  avx512fType t2;
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
+  takesAvx512(t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
+
+  variadic_err(1, t1); // no256-error {{AVX vector argument of type 'avx256Type' (vector of 16 'short' values) without 'avx' enabled changes the ABI}}
+  // avx512-256-error at +1 {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'evex512' enabled changes the ABI}}
+  variadic_err(3, t2); // no512-error {{AVX vector argument of type 'avx512fType' (vector of 32 'short' values) without 'avx512f' enabled changes the ABI}}
+}
+
 __attribute__((target("avx"))) void call_avx256_ok(void) {
   avx256Type t;
   takesAvx256(t);
@@ -93,5 +91,11 @@ __attribute__((target("avx512f"))) void call_avx512_ok2(void) {
 // avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
 // avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
 // avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
+// avx512-256-warning@*:* {{invalid feature combination: +avx512f +avx10.1-256; will be promoted to avx10.1-512}}
 #endif

diff  --git a/clang/test/Driver/aarch64-soft-float-abi.c b/clang/test/Driver/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..0486d94e66072e
--- /dev/null
+++ b/clang/test/Driver/aarch64-soft-float-abi.c
@@ -0,0 +1,26 @@
+// REQUIRES: aarch64-registered-target
+
+// Hard-float, valid
+// RUN: %clang --target=aarch64-none-elf                               -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf                   -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r                -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r    -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+fp -mabi=aapcs -c %s -o /dev/null
+
+// Soft-float, no FP
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp -mabi=aapcs-soft -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -mgeneral-regs-only -mabi=aapcs-soft -c %s -o /dev/null
+
+// Soft-float, FP hardware: Rejected, to avoid having two incompatible ABIs for common targets.
+// RUN: not %clang --target=aarch64-none-elf                        -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+// RUN: not %clang --target=aarch64-none-elf -march=armv8-r+fp      -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+// RUN: not %clang --target=aarch64-none-elf -march=armv8-r+nofp+fp -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+
+// No FP, hard-float. This is accepted by the driver, but functions which
+// require arguments or returns to be passed in FP registers will be rejected
+// (tested elsewhere).
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp             -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -mgeneral-regs-only -mabi=aapcs -c %s -o /dev/null
+
+// INVALID-SOFT: error: 'aapcs-soft' ABI is not supported with FPU

diff  --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 6ec4dcd60cf601..9f8a8bdeeb9cb0 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -342,15 +342,15 @@
 
 // RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-1 %s
 // RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto+fp+simd+crc+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-1 %s
-// RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
-// RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto+nofp+nosimd+nocrc+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
+// RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto -mabi=aapcs-soft -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
+// RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto+nofp+nosimd+nocrc+nocrypto -mabi=aapcs-soft -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
 // RUN: %clang -target aarch64 -march=armv8-a+nosimd -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-3 %s
 // CHECK-MARCH-1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+fp-armv8" "-target-feature" "+sha2" "-target-feature" "+neon"
 // CHECK-MARCH-2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-fp-armv8"{{.*}} "-target-feature" "-neon"
 // CHECK-MARCH-3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-neon"
 
 // While we're checking +nofp, also make sure it stops defining __ARM_FP
-// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -mabi=aapcs-soft -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s
 // CHECK-NOFP-NOT: #define __ARM_FP{{ }}
 
 // Check +sm4:

diff  --git a/clang/test/Sema/arm-vector-types-support.c b/clang/test/Sema/arm-vector-types-support.c
index 83a83ddfe78017..ed5f5ba175a94a 100644
--- a/clang/test/Sema/arm-vector-types-support.c
+++ b/clang/test/Sema/arm-vector-types-support.c
@@ -1,4 +1,6 @@
 // RUN: %clang_cc1 %s -triple armv7 -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple aarch64 -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -fsyntax-only -verify
 
 typedef __attribute__((neon_vector_type(2))) int int32x2_t; // expected-error{{'neon_vector_type' attribute is not supported on targets missing 'neon', 'mve', 'sve' or 'sme'; specify an appropriate -march= or -mcpu=}}
 typedef __attribute__((neon_polyvector_type(16))) short poly8x16_t; // expected-error{{'neon_polyvector_type' attribute is not supported on targets missing 'neon' or 'mve'; specify an appropriate -march= or -mcpu=}}

diff  --git a/llvm/test/CodeGen/AArch64/soft-float-abi.ll b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
new file mode 100644
index 00000000000000..291c3875c2488d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc --mtriple aarch64-none-eabi < %s -mattr=-fp-armv8 | FileCheck %s
+
+; See also clang/test/CodeGen/aarch64-soft-float-abi.c, which tests the clang
+; parts of the soft-float ABI.
+
+; FP types up to 64-bit are passed in a general purpose register.
+define half @test0(half %a, half %b)  {
+; CHECK-LABEL: test0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret half %b
+}
+
+define bfloat @test1(i32 %a, bfloat %b) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret bfloat %b
+}
+
+define float @test2(i64 %a, float %b) {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret float %b
+}
+
+define double @test3(half %a, double %b) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+entry:
+  ret double %b
+}
+
+; fp128 is passed in a pair of GPRs.
+define fp128 @test4(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; fp128 is passed in an aligned pair of GPRs, leaving one register unused is
+; necessary.
+define fp128 @test5(float %a, fp128 %b) {
+; CHECK-LABEL: test5:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; If the alignment of an fp128 leaves a register unused, it remains unused even
+; if a later argument could fit in it.
+define i64 @test6(i64 %a, fp128 %b, i64 %c) {
+; CHECK-LABEL: test6:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x4
+; CHECK-NEXT:    ret
+entry:
+  ret i64 %c
+}
+
+; HFAs are all bit-casted to integer types in the frontend when using the
+; soft-float ABI, so they get passed in the same way as non-homeogeneous
+; aggregates. The IR is identical to the equivalent integer types, so nothing
+; to test here.
+
+; The PCS for vector and HVA types is not defined by the soft-float ABI because
+; these types are only defined by the ACLE when vector hardware is available,
+; so nothing to test here.
+
+; The front-end generates IR for va_arg which always reads from the integer
+; register save area, and never the floating-point register save area. The
+; layout of the va_list type remains the same, the floating-point related
+; fields are unused. The only change needed in the backend is  in va_start, to
+; not attempt to save the floating-point registers or set the FP fields in the
+; va_list.
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+declare void @llvm.va_start(ptr)
+define double @test20(i32 %a, ...) {
+; CHECK-LABEL: test20:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    mov w8, #-56 // =0xffffffc8
+; CHECK-NEXT:    add x10, sp, #8
+; CHECK-NEXT:    add x9, sp, #96
+; CHECK-NEXT:    str x8, [sp, #88]
+; CHECK-NEXT:    add x10, x10, #56
+; CHECK-NEXT:    ldrsw x8, [sp, #88]
+; CHECK-NEXT:    stp x1, x2, [sp, #8]
+; CHECK-NEXT:    stp x3, x4, [sp, #24]
+; CHECK-NEXT:    stp x5, x6, [sp, #40]
+; CHECK-NEXT:    stp x7, x9, [sp, #56]
+; CHECK-NEXT:    str x10, [sp, #72]
+; CHECK-NEXT:    tbz w8, #31, .LBB7_3
+; CHECK-NEXT:  // %bb.1: // %vaarg.maybe_reg
+; CHECK-NEXT:    add w9, w8, #8
+; CHECK-NEXT:    cmn w8, #8
+; CHECK-NEXT:    str w9, [sp, #88]
+; CHECK-NEXT:    b.gt .LBB7_3
+; CHECK-NEXT:  // %bb.2: // %vaarg.in_reg
+; CHECK-NEXT:    ldr x9, [sp, #72]
+; CHECK-NEXT:    add x8, x9, x8
+; CHECK-NEXT:    b .LBB7_4
+; CHECK-NEXT:  .LBB7_3: // %vaarg.on_stack
+; CHECK-NEXT:    ldr x8, [sp, #64]
+; CHECK-NEXT:    add x9, x8, #8
+; CHECK-NEXT:    str x9, [sp, #64]
+; CHECK-NEXT:  .LBB7_4: // %vaarg.end
+; CHECK-NEXT:    ldr x0, [x8]
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
+entry:
+  %vl = alloca %struct.__va_list, align 8
+  call void @llvm.va_start(ptr nonnull %vl)
+  %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 3
+  %gr_offs = load i32, ptr %gr_offs_p, align 8
+  %0 = icmp sgt i32 %gr_offs, -1
+  br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %new_reg_offs = add nsw i32 %gr_offs, 8
+  store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+  %inreg = icmp slt i32 %gr_offs, -7
+  br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 1
+  %reg_top = load ptr, ptr %reg_top_p, align 8
+  %1 = sext i32 %gr_offs to i64
+  %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+  %stack = load ptr, ptr %vl, align 8
+  %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+  store ptr %new_stack, ptr %vl, align 8
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+  %3 = load double, ptr %vaargs.addr, align 8
+  ret double %3
+}
+