[clang] 9cc98e3 - [AArch64] Add soft-float ABI (#74460)

Thu Feb 15 04:39:20 PST 2024

Author: ostannard
Date: 2024-02-15T12:39:16Z
New Revision: 9cc98e336980f00cbafcbed8841344e6ac472bdc

URL: https://github.com/llvm/llvm-project/commit/9cc98e336980f00cbafcbed8841344e6ac472bdc
DIFF: https://github.com/llvm/llvm-project/commit/9cc98e336980f00cbafcbed8841344e6ac472bdc.diff

LOG: [AArch64] Add soft-float ABI (#74460)

This adds support for the AArch64 soft-float ABI. The specification for
this ABI was added by https://github.com/ARM-software/abi-aa/pull/232.

Because all existing AArch64 hardware has floating-point hardware, we
expect this to be a niche option, only used for embedded systems on
R-profile systems. We are going to document that SysV-like systems
should only ever use the base (hard-float) PCS variant:
https://github.com/ARM-software/abi-aa/pull/233. For that reason, I've
not added an option to select the ABI independently of the FPU hardware,
instead the new ABI is enabled iff the target architecture does not have
an FPU.

For testing, I have run this through an ABI fuzzer, but since this is
the first implementation it can only test for internal consistency
(callers and callees agree on the PCS), not for conformance to the ABI
spec.

Added: 
    clang/test/CodeGen/aarch64-soft-float-abi.c
    clang/test/Driver/aarch64-soft-float-abi.c
    clang/test/Sema/aarch64-soft-float-abi.c
    llvm/test/CodeGen/AArch64/soft-float-abi.ll

Modified: 
    clang/include/clang/Basic/DiagnosticCommonKinds.td
    clang/include/clang/Basic/DiagnosticSemaKinds.td
    clang/include/clang/Basic/TargetInfo.h
    clang/lib/Basic/TargetInfo.cpp
    clang/lib/Basic/Targets.cpp
    clang/lib/Basic/Targets/AArch64.cpp
    clang/lib/Basic/Targets/AArch64.h
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/lib/CodeGen/TargetInfo.h
    clang/lib/CodeGen/Targets/AArch64.cpp
    clang/lib/Sema/Sema.cpp
    clang/test/CodeGen/attr-target-clones-aarch64.c
    clang/test/Preprocessor/aarch64-target-features.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 08bb1d81ba29f1..43e132e5665850 100644

--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -356,6 +356,8 @@ def warn_target_unrecognized_env : Warning<
 def warn_knl_knm_isa_support_removed : Warning<
   "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.">,
   InGroup<DiagGroup<"knl-knm-isa-support-removed">>;
+def err_target_unsupported_abi_with_fpu : Error<
+  "'%0' ABI is not supported with FPU">;
 
 // Source manager
 def err_cannot_open_file : Error<"cannot open file '%0': %1">, DefaultFatal;

diff  --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 40b47c3ca92e7d..935d94e38cc377 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -11314,6 +11314,8 @@ def err_omp_wrong_dependency_iterator_type : Error<
 def err_target_unsupported_type
     : Error<"%0 requires %select{|%2 bit size}1 %3 %select{|return }4type support,"
             " but target '%5' does not support it">;
+def err_target_unsupported_type_for_abi
+    : Error<"%0 requires %1 type support, but ABI '%2' does not support it">;
 def err_omp_lambda_capture_in_declare_target_not_to : Error<
   "variable captured in declare target region must appear in a to clause">;
 def err_omp_device_type_mismatch : Error<

diff  --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 48e9cec482755c..2f80234c0dc7e3 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -231,6 +231,7 @@ class TargetInfo : public TransferrableTargetInfo,
   bool HasIbm128;
   bool HasLongDouble;
   bool HasFPReturn;
+  bool HasFPTypes;
   bool HasStrictFP;
 
   unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth;
@@ -689,6 +690,9 @@ class TargetInfo : public TransferrableTargetInfo,
   /// on this target.
   virtual bool hasFPReturn() const { return HasFPReturn; }
 
+  /// Determine whether floating point types are supported for this target.
+  virtual bool hasFPTypes() const { return HasFPTypes; }
+
   /// Determine whether constrained floating point is supported on this target.
   virtual bool hasStrictFP() const { return HasStrictFP; }
 
@@ -1331,6 +1335,10 @@ class TargetInfo : public TransferrableTargetInfo,
     return false;
   }
 
+  /// Make changes to the supported types which depend on both the target
+  /// features and ABI.
+  virtual void setSupportedArgTypes() {}
+
   /// Use the specified unit for FP math.
   ///
   /// \return False on error (invalid unit name).

diff  --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp
index 96b3ad9ba2f273..08fb0f2bb1bad6 100644
--- a/clang/lib/Basic/TargetInfo.cpp
+++ b/clang/lib/Basic/TargetInfo.cpp
@@ -67,6 +67,7 @@ TargetInfo::TargetInfo(const llvm::Triple &T) : Triple(T) {
   HasFullBFloat16 = false;
   HasLongDouble = true;
   HasFPReturn = true;
+  HasFPTypes = true;
   HasStrictFP = false;
   PointerWidth = PointerAlign = 32;
   BoolWidth = BoolAlign = 8;

diff  --git a/clang/lib/Basic/Targets.cpp b/clang/lib/Basic/Targets.cpp
index e3283510c6aac7..577564902453ae 100644
--- a/clang/lib/Basic/Targets.cpp
+++ b/clang/lib/Basic/Targets.cpp
@@ -830,6 +830,7 @@ TargetInfo::CreateTargetInfo(DiagnosticsEngine &Diags,
   Target->setSupportedOpenCLOpts();
   Target->setCommandLineOpenCLOpts();
   Target->setMaxAtomicWidth();
+  Target->setSupportedArgTypes();
 
   if (!Opts->DarwinTargetVariantTriple.empty())
     Target->DarwinTargetVariantTriple =

diff  --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index dd0218e6ebed81..c49461bd20eeec 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
+#include "clang/Basic/Diagnostic.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
@@ -199,13 +200,32 @@ AArch64TargetInfo::AArch64TargetInfo(const llvm::Triple &Triple,
 StringRef AArch64TargetInfo::getABI() const { return ABI; }
 
 bool AArch64TargetInfo::setABI(const std::string &Name) {
-  if (Name != "aapcs" && Name != "darwinpcs")
+  if (Name != "aapcs" && Name != "aapcs-soft" && Name != "darwinpcs")
     return false;
 
   ABI = Name;
   return true;
 }
 
+void AArch64TargetInfo::setSupportedArgTypes() {
+  if (!(FPU & FPUMode) && ABI != "aapcs-soft") {
+    // When a hard-float ABI is used on a target without an FPU, all
+    // floating-point argument and return types are rejected because they must
+    // be passed in FP registers.
+    HasFPTypes = false;
+  }
+}
+
+bool AArch64TargetInfo::validateTarget(DiagnosticsEngine &Diags) const {
+  if (hasFeature("fp") && ABI == "aapcs-soft") {
+    // aapcs-soft is not allowed for targets with an FPU, to avoid there being
+    // two incomatible ABIs.
+    Diags.Report(diag::err_target_unsupported_abi_with_fpu) << ABI;
+    return false;
+  }
+  return true;
+}
+
 bool AArch64TargetInfo::validateBranchProtection(StringRef Spec, StringRef,
                                                  BranchProtectionInfo &BPI,
                                                  StringRef &Err) const {
@@ -686,7 +706,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Cases("aarch64", "arm64", "arm", true)
       .Case("fmv", HasFMV)
-      .Cases("neon", "fp", "simd", FPU & NeonMode)
+      .Case("fp", FPU & FPUMode)
+      .Cases("neon", "simd", FPU & NeonMode)
       .Case("jscvt", HasJSCVT)
       .Case("fcma", HasFCMA)
       .Case("rng", HasRandGen)

diff  --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 26ee7fa1978256..f4ec3543f4082c 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -96,6 +96,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
 
   StringRef getABI() const override;
   bool setABI(const std::string &Name) override;
+  void setSupportedArgTypes() override;
 
   bool validateBranchProtection(StringRef Spec, StringRef Arch,
                                 BranchProtectionInfo &BPI,
@@ -199,6 +200,8 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
   bool hasInt128Type() const override;
 
   bool hasBitIntType() const override { return true; }
+
+  bool validateTarget(DiagnosticsEngine &Diags) const override;
 };
 
 class LLVM_LIBRARY_VISIBILITY AArch64leTargetInfo : public AArch64TargetInfo {

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c984260b082cd1..64836f8d174710 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -145,6 +145,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
       Kind = AArch64ABIKind::DarwinPCS;
     else if (Triple.isOSWindows())
       return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
+    else if (Target.getABI() == "aapcs-soft")
+      Kind = AArch64ABIKind::AAPCSSoft;
 
     return createAArch64TargetCodeGenInfo(CGM, Kind);
   }

diff  --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 7682f197041c74..0571e828bb1d4a 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -416,6 +416,7 @@ enum class AArch64ABIKind {
   AAPCS = 0,
   DarwinPCS,
   Win64,
+  AAPCSSoft,
 };
 
 std::unique_ptr<TargetCodeGenInfo>

diff  --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index ee7f95084d2e0b..81c1b29187ed96 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -53,8 +53,8 @@ class AArch64ABIInfo : public ABIInfo {
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const;
 
-  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const;
+  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
+                         AArch64ABIKind Kind) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
@@ -65,7 +65,7 @@ class AArch64ABIInfo : public ABIInfo {
 
     return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
            : isDarwinPCS()               ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
-                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+                           : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind);
   }
 
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -482,6 +482,11 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // For the soft-float ABI variant, no types are considered to be homogeneous
+  // aggregates.
+  if (Kind == AArch64ABIKind::AAPCSSoft)
+    return false;
+
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
   // but with the 
diff erence that any floating-point type is allowed,
@@ -513,7 +518,8 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
 }
 
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+                                       CodeGenFunction &CGF,
+                                       AArch64ABIKind Kind) const {
   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
                                        CGF.CurFnInfo->getCallingConvention());
   // Empty records are ignored for parameter passing purposes.
@@ -538,7 +544,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+  bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft &&
+               (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:

diff  --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index cfb653e665ea03..72e837e13afed1 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -1944,6 +1944,21 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID,
   return DB;
 }
 
+static bool typeIsOrContainsFloat(const Type &Ty) {
+  if (Ty.isFloatingType())
+    return true;
+
+  if (const RecordDecl *Decl = Ty.getAsRecordDecl()) {
+    for (const FieldDecl *FD : Decl->fields()) {
+      const Type &FieldType = *FD->getType();
+      if (typeIsOrContainsFloat(FieldType))
+        return true;
+    }
+  }
+
+  return false;
+}
+
 void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
   if (isUnevaluatedContext() || Ty.isNull())
     return;
@@ -2088,6 +2103,25 @@ void Sema::checkTypeSupport(QualType Ty, SourceLocation Loc, ValueDecl *D) {
           !Builtin::evaluateRequiredTargetFeatures("sme", CallerFeatureMap))
         Diag(D->getLocation(), diag::err_sve_vector_in_non_sve_target) << Ty;
     }
+
+    // Don't allow any floating-point types (including structs containing
+    // floats) for ABIs which do not support them.
+    if (!TI.hasFPTypes() && typeIsOrContainsFloat(*UnqualTy)) {
+      PartialDiagnostic PD = PDiag(diag::err_target_unsupported_type_for_abi);
+
+      if (D)
+        PD << D;
+      else
+        PD << "expression";
+
+      if (Diag(Loc, PD, FD) << Ty << TI.getABI()) {
+        if (D)
+          D->setInvalidDecl();
+      }
+
+      if (D)
+        targetDiag(D->getLocation(), diag::note_defined_here, FD) << D;
+    }
   };
 
   CheckType(Ty);

diff  --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..54564310752029
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -target-abi aapcs -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
+
+// See also llvm/test/CodeGen/AArch64/soft-float-abi.ll, which checks the LLVM
+// backend parts of the soft-float ABI.
+
+// The va_list type does not change between the ABIs
+// CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
+// Floats are passed in integer registers, this will be handled by the backend.
+// CHECK: define dso_local half @test0(half noundef %a)
+// CHECK: define dso_local bfloat @test1(bfloat noundef %a)
+// CHECK: define dso_local float @test2(float noundef %a)
+// CHECK: define dso_local double @test3(double noundef %a)
+// CHECK: define dso_local fp128 @test4(fp128 noundef %a)
+__fp16 test0(__fp16 a) { return a; }
+__bf16 test1(__bf16 a) { return a; }
+float test2(float a) { return a; }
+double test3(double a) { return a; }
+long double test4(long double a) { return a; }
+
+// No types are considered to be HFAs or HVAs by the soft-float PCS, so these
+// are converted to integer types.
+struct A {
+  float x;
+};
+// SOFT: define dso_local i32 @test10(i64 %a.coerce)
+// HARD: define dso_local %struct.A @test10([1 x float] alignstack(8) %a.coerce)
+struct A test10(struct A a) { return a; }
+
+struct B {
+  double x;
+  double y;
+};
+// SOFT: define dso_local [2 x i64] @test11([2 x i64] %a.coerce)
+// HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
+struct B test11(struct B a) { return a; }
+
+#include <stdarg.h>
+
+// For variadic arguments, va_arg will always retreive 
+// CHECK-LABEL: define dso_local double @test20(i32 noundef %a, ...)
+// CHECK: %vl = alloca %struct.__va_list, align 8
+// SOFT: %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 3
+// SOFT: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 1
+// HARD: %vr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 4
+// HARD: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 2
+double test20(int a, ...) {
+  va_list vl;
+  va_start(vl, a);
+  return va_arg(vl, double);
+}
+
+// Vector types are only available for targets with the correct hardware, and
+// their calling-convention is left undefined by the soft-float ABI, so they
+// aren't tested here.

diff  --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 5ea3f4a9b0b112..47eceadd4376f6 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
 
 int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; }
 int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) ftc_def(void) { return 1; }
@@ -414,23 +414,23 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NOFMV-NEXT:    ret i32 [[ADD5]]
 //
 //.
-// CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon" }
-// CHECK: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" }
-// CHECK: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sha2" }
-// CHECK: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+mte,+neon,+sha2" }
-// CHECK: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
-// CHECK: attributes #[[ATTR6:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+fp-armv8,+neon" }
-// CHECK: attributes #[[ATTR7:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rand" }
-// CHECK: attributes #[[ATTR8:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc" }
-// CHECK: attributes #[[ATTR9:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" }
-// CHECK: attributes #[[ATTR10:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" }
-// CHECK: attributes #[[ATTR11:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm" }
-// CHECK: attributes #[[ATTR12:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" }
-// CHECK: attributes #[[ATTR13:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" }
+// CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,-fp-armv8" }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8" }
+// CHECK: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mte,+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR6:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR7:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+rand,-fp-armv8" }
+// CHECK: attributes #[[ATTR8:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc,-fp-armv8" }
+// CHECK: attributes #[[ATTR9:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt,-fp-armv8" }
+// CHECK: attributes #[[ATTR10:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR11:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm,-fp-armv8" }
+// CHECK: attributes #[[ATTR12:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,-fp-armv8" }
+// CHECK: attributes #[[ATTR13:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sb,+sve,-fp-armv8" }
 //.
-// CHECK-NOFMV: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
-// CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
+// CHECK-NOFMV: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv,-fp-armv8" }
+// CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv,-fp-armv8" }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}

diff  --git a/clang/test/Driver/aarch64-soft-float-abi.c b/clang/test/Driver/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..08dd5418961932
--- /dev/null
+++ b/clang/test/Driver/aarch64-soft-float-abi.c
@@ -0,0 +1,23 @@
+// Hard-float, valid
+// RUN: %clang --target=aarch64-none-elf                               -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf                   -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r                -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r    -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+fp -mabi=aapcs -c %s -o /dev/null
+
+// Soft-float, no FP
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp -mabi=aapcs-soft -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -mgeneral-regs-only -mabi=aapcs-soft -c %s -o /dev/null
+
+// Soft-float, FP hardware: Rejected, to avoid having two incompatible ABIs for common targets.
+// RUN: not %clang --target=aarch64-none-elf                        -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+// RUN: not %clang --target=aarch64-none-elf -march=armv8-r+fp      -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+// RUN: not %clang --target=aarch64-none-elf -march=armv8-r+nofp+fp -mabi=aapcs-soft -c %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID-SOFT
+
+// No FP, hard-float. This is accepted by the driver, but any use of
+// floating-point types will be rejected by Sema (tested elsewhere).
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp             -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -march=armv8-r+nofp -mabi=aapcs -c %s -o /dev/null
+// RUN: %clang --target=aarch64-none-elf -mgeneral-regs-only -mabi=aapcs -c %s -o /dev/null
+
+// INVALID-SOFT: error: 'aapcs-soft' ABI is not supported with FPU

diff  --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 6ec4dcd60cf601..9f8a8bdeeb9cb0 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -342,15 +342,15 @@
 
 // RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-1 %s
 // RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto+fp+simd+crc+crypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-1 %s
-// RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
-// RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto+nofp+nosimd+nocrc+nocrypto -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
+// RUN: %clang -target aarch64 -march=armv8-a+nofp+nosimd+nocrc+nocrypto -mabi=aapcs-soft -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
+// RUN: %clang -target aarch64 -march=armv8-a+fp+simd+crc+crypto+nofp+nosimd+nocrc+nocrypto -mabi=aapcs-soft -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-2 %s
 // RUN: %clang -target aarch64 -march=armv8-a+nosimd -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-MARCH-3 %s
 // CHECK-MARCH-1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "+v8a" "-target-feature" "+aes" "-target-feature" "+crc" "-target-feature" "+crypto" "-target-feature" "+fp-armv8" "-target-feature" "+sha2" "-target-feature" "+neon"
 // CHECK-MARCH-2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-fp-armv8"{{.*}} "-target-feature" "-neon"
 // CHECK-MARCH-3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-feature" "-neon"
 
 // While we're checking +nofp, also make sure it stops defining __ARM_FP
-// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-r+nofp -mabi=aapcs-soft -x c -E -dM %s -o - | FileCheck -check-prefix=CHECK-NOFP %s
 // CHECK-NOFP-NOT: #define __ARM_FP{{ }}
 
 // Check +sm4:

diff  --git a/clang/test/Sema/aarch64-soft-float-abi.c b/clang/test/Sema/aarch64-soft-float-abi.c
new file mode 100644
index 00000000000000..908f158ffe8719
--- /dev/null
+++ b/clang/test/Sema/aarch64-soft-float-abi.c
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +fp-armv8 -fsyntax-only -target-abi aapcs      -verify=fp-hard %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -fsyntax-only -target-abi aapcs-soft -verify=nofp-soft %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -fsyntax-only -target-abi aapcs      -verify=nofp-hard %s
+// No run line needed for soft-float ABI with an FPU because that is rejected by the driver
+
+// With the hard-float ABI and a target with an FPU, FP arguments are passed in
+// FP registers, no diagnostics needed.
+// fp-hard-no-diagnostics
+
+// With the soft-float ABI, FP arguments are passed in integer registers, no
+// diagnostics needed.
+// nofp-soft-no-diagnostics
+
+// With the hard-float ABI but no FPU, FP arguments cannot be passed in an
+// ABI-compatible way, so we report errors for these cases:
+
+struct HFA {
+  float x, y;
+};
+
+struct non_HFA {
+  float x;
+  int y;
+};
+
+float test_float_ret(void) { return 3.141f; } // #1
+// nofp-hard-error@#1 {{'test_float_ret' requires 'float' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-note@#1 {{'test_float_ret' defined here}}
+
+struct HFA test_hfa_ret(void) { return (struct HFA){}; } // #2
+// nofp-hard-error@#2 {{'test_hfa_ret' requires 'struct HFA' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-note@#2 {{'test_hfa_ret' defined here}}
+
+struct non_HFA test_non_hfa_ret(void) { return (struct non_HFA){}; } // #3
+// nofp-hard-error@#3 {{'test_non_hfa_ret' requires 'struct non_HFA' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-note@#3 {{'test_non_hfa_ret' defined here}}
+
+void test_float_arg(float a) {} // #4
+// nofp-hard-error@#4 {{'test_float_arg' requires 'float' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-note@#4 {{'test_float_arg' defined here}}
+
+void test_hfa_arg(struct HFA a) {} // #5
+// nofp-hard-error@#5 {{'test_hfa_arg' requires 'struct HFA' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-note@#5 {{'test_hfa_arg' defined here}}
+
+void test_non_hfa_arg(struct non_HFA a) {} // #6
+// nofp-hard-error@#6 {{'test_non_hfa_arg' requires 'struct non_HFA' type support, but ABI 'aapcs' does not support it}}
+// nofp-hard-note@#6 {{'test_non_hfa_arg' defined here}}
+
+int test_expr_float(int a) { return a + 1.0f; } // #7
+// nofp-hard-error@#7 {{expression requires 'float' type support, but ABI 'aapcs' does not support it}}
+
+int test_expr_double(int a) { return a + 1.0; } // #8
+// nofp-hard-error@#8 {{expression requires 'double' type support, but ABI 'aapcs' does not support it}}

diff  --git a/llvm/test/CodeGen/AArch64/soft-float-abi.ll b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
new file mode 100644
index 00000000000000..291c3875c2488d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc --mtriple aarch64-none-eabi < %s -mattr=-fp-armv8 | FileCheck %s
+
+; See also clang/test/CodeGen/aarch64-soft-float-abi.c, which tests the clang
+; parts of the soft-float ABI.
+
+; FP types up to 64-bit are passed in a general purpose register.
+define half @test0(half %a, half %b)  {
+; CHECK-LABEL: test0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret half %b
+}
+
+define bfloat @test1(i32 %a, bfloat %b) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret bfloat %b
+}
+
+define float @test2(i64 %a, float %b) {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret float %b
+}
+
+define double @test3(half %a, double %b) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+entry:
+  ret double %b
+}
+
+; fp128 is passed in a pair of GPRs.
+define fp128 @test4(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; fp128 is passed in an aligned pair of GPRs, leaving one register unused is
+; necessary.
+define fp128 @test5(float %a, fp128 %b) {
+; CHECK-LABEL: test5:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; If the alignment of an fp128 leaves a register unused, it remains unused even
+; if a later argument could fit in it.
+define i64 @test6(i64 %a, fp128 %b, i64 %c) {
+; CHECK-LABEL: test6:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x4
+; CHECK-NEXT:    ret
+entry:
+  ret i64 %c
+}
+
+; HFAs are all bit-casted to integer types in the frontend when using the
+; soft-float ABI, so they get passed in the same way as non-homeogeneous
+; aggregates. The IR is identical to the equivalent integer types, so nothing
+; to test here.
+
+; The PCS for vector and HVA types is not defined by the soft-float ABI because
+; these types are only defined by the ACLE when vector hardware is available,
+; so nothing to test here.
+
+; The front-end generates IR for va_arg which always reads from the integer
+; register save area, and never the floating-point register save area. The
+; layout of the va_list type remains the same, the floating-point related
+; fields are unused. The only change needed in the backend is  in va_start, to
+; not attempt to save the floating-point registers or set the FP fields in the
+; va_list.
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+declare void @llvm.va_start(ptr)
+define double @test20(i32 %a, ...) {
+; CHECK-LABEL: test20:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    mov w8, #-56 // =0xffffffc8
+; CHECK-NEXT:    add x10, sp, #8
+; CHECK-NEXT:    add x9, sp, #96
+; CHECK-NEXT:    str x8, [sp, #88]
+; CHECK-NEXT:    add x10, x10, #56
+; CHECK-NEXT:    ldrsw x8, [sp, #88]
+; CHECK-NEXT:    stp x1, x2, [sp, #8]
+; CHECK-NEXT:    stp x3, x4, [sp, #24]
+; CHECK-NEXT:    stp x5, x6, [sp, #40]
+; CHECK-NEXT:    stp x7, x9, [sp, #56]
+; CHECK-NEXT:    str x10, [sp, #72]
+; CHECK-NEXT:    tbz w8, #31, .LBB7_3
+; CHECK-NEXT:  // %bb.1: // %vaarg.maybe_reg
+; CHECK-NEXT:    add w9, w8, #8
+; CHECK-NEXT:    cmn w8, #8
+; CHECK-NEXT:    str w9, [sp, #88]
+; CHECK-NEXT:    b.gt .LBB7_3
+; CHECK-NEXT:  // %bb.2: // %vaarg.in_reg
+; CHECK-NEXT:    ldr x9, [sp, #72]
+; CHECK-NEXT:    add x8, x9, x8
+; CHECK-NEXT:    b .LBB7_4
+; CHECK-NEXT:  .LBB7_3: // %vaarg.on_stack
+; CHECK-NEXT:    ldr x8, [sp, #64]
+; CHECK-NEXT:    add x9, x8, #8
+; CHECK-NEXT:    str x9, [sp, #64]
+; CHECK-NEXT:  .LBB7_4: // %vaarg.end
+; CHECK-NEXT:    ldr x0, [x8]
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
+entry:
+  %vl = alloca %struct.__va_list, align 8
+  call void @llvm.va_start(ptr nonnull %vl)
+  %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 3
+  %gr_offs = load i32, ptr %gr_offs_p, align 8
+  %0 = icmp sgt i32 %gr_offs, -1
+  br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %new_reg_offs = add nsw i32 %gr_offs, 8
+  store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+  %inreg = icmp slt i32 %gr_offs, -7
+  br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 1
+  %reg_top = load ptr, ptr %reg_top_p, align 8
+  %1 = sext i32 %gr_offs to i64
+  %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+  %stack = load ptr, ptr %vl, align 8
+  %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+  store ptr %new_stack, ptr %vl, align 8
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+  %3 = load double, ptr %vaargs.addr, align 8
+  ret double %3
+}
+