[llvm] [clang] [AArch64] Add soft-float ABI (PR #74460)

Wed Dec 6 01:41:37 PST 2023

https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/74460

>From 316854b6558811aaa03b9f96be1849e0426f8aac Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Fri, 1 Dec 2023 10:06:57 +0000
Subject: [PATCH 1/6] [AArch64] Split feature tests for FP and SIMD

AArch64TargetInfo defaults to having the FP feature enabled, but this
function was ignoring that and checking for SIMD instructions instead.

This won't affect most users, because the driver explicitly enables or
disables fp-armv8, which gets handled by
AArch64TargetInfo::handleTargetFeatures to turn FP and SIMD on or off.
However, it will make testing future patches easier, and allow testing
for the presense of FP registers/instructions in CC1 tests.

Change-Id: I2d2b3569dca5fa1dc40c5c6d1dabf7741b8c480e
---
 clang/lib/Basic/Targets/AArch64.cpp           |   3 +-
 .../test/CodeGen/attr-target-clones-aarch64.c | 126 +++++++++++++++---
 2 files changed, 107 insertions(+), 22 deletions(-)

diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index c31f2e0bee543..23090dad83ad7 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -649,7 +649,8 @@ bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
   return llvm::StringSwitch<bool>(Feature)
       .Cases("aarch64", "arm64", "arm", true)
       .Case("fmv", HasFMV)
-      .Cases("neon", "fp", "simd", FPU & NeonMode)
+      .Case("fp", FPU & FPUMode)
+      .Cases("neon", "simd", FPU & NeonMode)
       .Case("jscvt", HasJSCVT)
       .Case("fcma", HasFCMA)
       .Case("rng", HasRandGen)
diff --git a/clang/test/CodeGen/attr-target-clones-aarch64.c b/clang/test/CodeGen/attr-target-clones-aarch64.c
index 3f2f2fdd24e8a..4404dd0da8e5e 100644
--- a/clang/test/CodeGen/attr-target-clones-aarch64.c
+++ b/clang/test/CodeGen/attr-target-clones-aarch64.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-attributes --check-globals --include-generated-funcs
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -S -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -S -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature -fp-armv8 -target-feature -fmv -S -emit-llvm -o - %s | FileCheck %s -check-prefix=CHECK-NOFMV
 
 int __attribute__((target_clones("lse+aes", "sve2"))) ftc(void) { return 0; }
 int __attribute__((target_clones("sha2", "sha2+memtag2", " default "))) ftc_def(void) { return 1; }
@@ -22,6 +22,8 @@ int __attribute__((target_clones("default"))) main() {
 inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))) ftc_inline2(void) { return 2; };
 
 
+
+//.
 // CHECK: @__aarch64_cpu_features = external dso_local global { i64 }
 // CHECK: @ftc.ifunc = weak_odr ifunc i32 (), ptr @ftc.resolver
 // CHECK: @ftc_def.ifunc = weak_odr ifunc i32 (), ptr @ftc_def.resolver
@@ -30,19 +32,25 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK: @ftc_inline1.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline1.resolver
 // CHECK: @ftc_inline2.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline2.resolver
 // CHECK: @ftc_inline3.ifunc = weak_odr ifunc i32 (), ptr @ftc_inline3.resolver
-
+//.
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc._MlseMaes(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 0
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc._Msve2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 0
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 0
+//
+//
 // CHECK-LABEL: @ftc.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -63,18 +71,26 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc._Msve2
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_def._Msha2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_def._Msha2Mmemtag2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_def(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK-LABEL: @ftc_def.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -95,14 +111,20 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_def._Msha2
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_def
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup1._Msha2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup1(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK-LABEL: @ftc_dup1.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -115,18 +137,26 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_dup1._Msha2
 // CHECK:       resolver_else:
 // CHECK-NEXT:    ret ptr @ftc_dup1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup2._Mfp(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup2._MdotprodMcrc(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_dup2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK-LABEL: @ftc_dup2.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -147,6 +177,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_dup2._Mfp
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_dup2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @foo(
 // CHECK-NEXT:  entry:
@@ -158,10 +190,14 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    [[CALL4:%.*]] = call i32 @ftc_dup2.ifunc()
 // CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NEXT:    ret i32 [[ADD5]]
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_direct(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 4
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @main(
 // CHECK-NEXT:  entry:
@@ -175,6 +211,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    [[CALL4:%.*]] = call i32 @ftc_direct()
 // CHECK-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NEXT:    ret i32 [[ADD5]]
+//
+//
 // CHECK-LABEL: @ftc_inline1.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -203,6 +241,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline1._MrngMsimd
 // CHECK:       resolver_else4:
 // CHECK-NEXT:    ret ptr @ftc_inline1
+//
+//
 // CHECK-LABEL: @ftc_inline2.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -223,6 +263,8 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline2._Mfp16
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_inline2
+//
+//
 // CHECK-LABEL: @ftc_inline3.resolver(
 // CHECK-NEXT:  resolver_entry:
 // CHECK-NEXT:    call void @__init_cpu_features_resolver()
@@ -243,62 +285,92 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NEXT:    ret ptr @ftc_inline3._Mbti
 // CHECK:       resolver_else2:
 // CHECK-NEXT:    ret ptr @ftc_inline3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1._MrngMsimd(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1._MrcpcMpredres(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1._Msve2-aesMwfxt(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline1(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 1
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline2._Mfp16(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline2._MfcmaMsve2-bitperm(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline2(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 2
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline3._Mbti(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline3._MsveMsb(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK: Function Attrs: noinline nounwind optnone
 // CHECK-LABEL: @ftc_inline3(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    ret i32 3
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 0
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_def(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 1
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_dup1(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 2
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_dup2(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 3
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @foo(
 // CHECK-NOFMV-NEXT:  entry:
@@ -310,10 +382,14 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NOFMV-NEXT:    [[CALL4:%.*]] = call i32 @ftc_dup2()
 // CHECK-NOFMV-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NOFMV-NEXT:    ret i32 [[ADD5]]
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @ftc_direct(
 // CHECK-NOFMV-NEXT:  entry:
 // CHECK-NOFMV-NEXT:    ret i32 4
+//
+//
 // CHECK-NOFMV: Function Attrs: noinline nounwind optnone
 // CHECK-NOFMV-LABEL: @main(
 // CHECK-NOFMV-NEXT:  entry:
@@ -327,21 +403,29 @@ inline int __attribute__((target_clones("fp16", "sve2-bitperm+fcma", "default"))
 // CHECK-NOFMV-NEXT:    [[CALL4:%.*]] = call i32 @ftc_direct()
 // CHECK-NOFMV-NEXT:    [[ADD5:%.*]] = add nsw i32 [[ADD3]], [[CALL4]]
 // CHECK-NOFMV-NEXT:    ret i32 [[ADD5]]
-
-// CHECK: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+lse,+neon" }
-// CHECK: attributes #1 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2" }
-// CHECK: attributes #2 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #3 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+sha2" }
-// CHECK: attributes #4 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+mte,+neon,+sha2" }
-// CHECK: attributes #5 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon" }
-// CHECK: attributes #6 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+fp-armv8,+neon" }
-// CHECK: attributes #7 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+neon,+rand" }
-// CHECK: attributes #8 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc" }
-// CHECK: attributes #9 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt" }
-// CHECK: attributes #10 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon" }
-// CHECK: attributes #11 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fp-armv8,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm" }
-// CHECK: attributes #12 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti" }
-// CHECK: attributes #13 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fp-armv8,+fullfp16,+neon,+sb,+sve" }
-
-// CHECK-NOFMV: attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
-// CHECK-NOFMV: attributes #1 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv" }
+//
+//.
+// CHECK: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+lse,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR1:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,-fp-armv8" }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fp-armv8" }
+// CHECK: attributes #[[ATTR3:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR4:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+mte,+neon,+sha2,-fp-armv8" }
+// CHECK: attributes #[[ATTR5:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR6:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+crc,+dotprod,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR7:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+neon,+rand,-fp-armv8" }
+// CHECK: attributes #[[ATTR8:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+predres,+rcpc,-fp-armv8" }
+// CHECK: attributes #[[ATTR9:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sve,+sve2,+sve2-aes,+wfxt,-fp-armv8" }
+// CHECK: attributes #[[ATTR10:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,-fp-armv8" }
+// CHECK: attributes #[[ATTR11:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+complxnum,+fullfp16,+neon,+sve,+sve2,+sve2-bitperm,-fp-armv8" }
+// CHECK: attributes #[[ATTR12:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+bti,-fp-armv8" }
+// CHECK: attributes #[[ATTR13:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+fullfp16,+neon,+sb,+sve,-fp-armv8" }
+//.
+// CHECK-NOFMV: attributes #[[ATTR0:[0-9]+]] = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv,-fp-armv8" }
+// CHECK-NOFMV: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="-fmv,-fp-armv8" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// CHECK-NOFMV: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK-NOFMV: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.

>From e541ff2c5fc1aaf9c924a2d39054c4eed46ca98d Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 23 Nov 2023 11:29:28 +0000
Subject: [PATCH 2/6] [AArch64] Add a soft-float ABI

This patch just adds the ABI enum and tests existing behaviour, it
doesn't make any functional changes yet.

I've not added an option to turn this on, instead it is automatically
selected if the target does not have FP registers. This is because the
vast majority of existing AArch64 hardware does have an FPU, so we
expect this to only be used when abbsoultely necessary, unlike AArch32
where both ABIs were commonly used.

Change-Id: I6f3a4562ca60c31b49a926cdce1881418677c604
---
 clang/lib/CodeGen/CodeGenModule.cpp         |  2 ++
 clang/lib/CodeGen/TargetInfo.h              |  1 +
 clang/test/CodeGen/aarch64-soft-float-abi.c | 15 +++++++++++++++
 3 files changed, 18 insertions(+)
 create mode 100644 clang/test/CodeGen/aarch64-soft-float-abi.c

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index dea58a7ff4146..2e730fdb0b83f 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -143,6 +143,8 @@ createTargetCodeGenInfo(CodeGenModule &CGM) {
       Kind = AArch64ABIKind::DarwinPCS;
     else if (Triple.isOSWindows())
       return createWindowsAArch64TargetCodeGenInfo(CGM, AArch64ABIKind::Win64);
+    else if (!Target.hasFeature("fp"))
+      Kind = AArch64ABIKind::AAPCSSoft;
 
     return createAArch64TargetCodeGenInfo(CGM, Kind);
   }
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index 0c0781a2d5ab9..0b69d92b70cee 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -416,6 +416,7 @@ enum class AArch64ABIKind {
   AAPCS = 0,
   DarwinPCS,
   Win64,
+  AAPCSSoft,
 };
 
 std::unique_ptr<TargetCodeGenInfo>
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
new file mode 100644
index 0000000000000..2ff9de8d34bd1
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+
+// Floats are passed in integer registers, this will be handled by the backend.
+// CHECK: define dso_local half @test0(half noundef %a)
+// CHECK: define dso_local bfloat @test1(bfloat noundef %a)
+// CHECK: define dso_local float @test2(float noundef %a)
+// CHECK: define dso_local double @test3(double noundef %a)
+// CHECK: define dso_local fp128 @test4(fp128 noundef %a)
+__fp16 test0(__fp16 a) { return a; }
+__bf16 test1(__bf16 a) { return a; }
+float test2(float a) { return a; }
+double test3(double a) { return a; }
+long double test4(long double a) { return a; }
+

>From 8891380d65f168eb66e92bd453448b83a7865aef Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 23 Nov 2023 11:31:52 +0000
Subject: [PATCH 3/6] [AArch64] Disable HFAs/HVAs for soft-float

If we can't pass them in FP registers, then homogeneous floating-point
and vector aggregates should be treated like any other composite type,
and passed either in registers or on the stack.

Change-Id: Icd56e122ad586462d6059069f923ffca4b32a8d2
---
 clang/lib/CodeGen/Targets/AArch64.cpp       |  5 +++++
 clang/test/CodeGen/aarch64-soft-float-abi.c | 24 +++++++++++++++++++--
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index be5145daa00b7..e290e8761a5f4 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -478,6 +478,11 @@ bool AArch64SwiftABIInfo::isLegalVectorType(CharUnits VectorSize,
 }
 
 bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
+  // For the soft-float ABI variant, no types are considered to be homogeneous
+  // aggregates.
+  if (Kind == AArch64ABIKind::AAPCSSoft)
+    return false;
+
   // Homogeneous aggregates for AAPCS64 must have base types of a floating
   // point type or a short-vector type. This is the same as the 32-bit ABI,
   // but with the difference that any floating-point type is allowed,
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
index 2ff9de8d34bd1..0d80e82e6aef3 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
+// RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
 
 // Floats are passed in integer registers, this will be handled by the backend.
 // CHECK: define dso_local half @test0(half noundef %a)
@@ -13,3 +13,23 @@ float test2(float a) { return a; }
 double test3(double a) { return a; }
 long double test4(long double a) { return a; }
 
+// No types are considered to be HFAs or HVAs by the soft-float PCS, so these
+// are converted to integer types.
+struct A {
+  float x;
+};
+// SOFT: define dso_local i32 @test10(i64 %a.coerce)
+// HARD: define dso_local %struct.A @test10([1 x float] alignstack(8) %a.coerce)
+struct A test10(struct A a) { return a; }
+
+struct B {
+  double x;
+  double y;
+};
+// SOFT: define dso_local [2 x i64] @test11([2 x i64] %a.coerce)
+// HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
+struct B test11(struct B a) { return a; }
+
+// Vector types are only available for targets with the correct hardware, and
+// their calling-convention is left undefined by the soft-float ABI, so they
+// aren't tested here.

>From b68ed60a3fe78bc0e9f57b45bed9dd78ed851904 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Thu, 23 Nov 2023 11:31:28 +0000
Subject: [PATCH 4/6] [AArch64] Fix va_arg for soft-float

The AArch64 back-end already avoids saving the FP registers to the
va_list when the FP registers aren't present, but clang also needs to
know not to load them from the FP register save area when generating
code for va_arg.

The layout of va_list remains the same, but the vr_top and vr_offs
fields are unused.

Change-Id: I5d3dee1ac4a29f189432957910662939b79d9329
---
 clang/lib/CodeGen/Targets/AArch64.cpp       | 12 +++++++-----
 clang/test/CodeGen/aarch64-soft-float-abi.c | 18 ++++++++++++++++++
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index e290e8761a5f4..a4089bb6c70f1 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -53,8 +53,8 @@ class AArch64ABIInfo : public ABIInfo {
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
                           CodeGenFunction &CGF) const;
 
-  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                         CodeGenFunction &CGF) const;
+  Address EmitAAPCSVAArg(Address VAListAddr, QualType Ty, CodeGenFunction &CGF,
+                         AArch64ABIKind Kind) const;
 
   Address EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
                     QualType Ty) const override {
@@ -65,7 +65,7 @@ class AArch64ABIInfo : public ABIInfo {
 
     return Kind == AArch64ABIKind::Win64 ? EmitMSVAArg(CGF, VAListAddr, Ty)
            : isDarwinPCS()               ? EmitDarwinVAArg(VAListAddr, Ty, CGF)
-                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF);
+                                         : EmitAAPCSVAArg(VAListAddr, Ty, CGF, Kind);
   }
 
   Address EmitMSVAArg(CodeGenFunction &CGF, Address VAListAddr,
@@ -514,7 +514,8 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
 }
 
 Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
-                                       CodeGenFunction &CGF) const {
+                                       CodeGenFunction &CGF,
+                                       AArch64ABIKind Kind) const {
   ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
                                        CGF.CurFnInfo->getCallingConvention());
   // Empty records are ignored for parameter passing purposes.
@@ -539,7 +540,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+  bool IsFPR = Kind == AArch64ABIKind::AAPCS &&
+               (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
index 0d80e82e6aef3..687647d650e1d 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
 // RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
 
+// The va_list type does not change between the ABIs
+// CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+
 // Floats are passed in integer registers, this will be handled by the backend.
 // CHECK: define dso_local half @test0(half noundef %a)
 // CHECK: define dso_local bfloat @test1(bfloat noundef %a)
@@ -30,6 +33,21 @@ struct B {
 // HARD: define dso_local %struct.B @test11([2 x double] alignstack(8) %a.coerce)
 struct B test11(struct B a) { return a; }
 
+#include <stdarg.h>
+
+// For variadic arguments, va_arg will always retreive 
+// CHECK-LABEL: define dso_local double @test20(i32 noundef %a, ...)
+// CHECK: %vl = alloca %struct.__va_list, align 8
+// SOFT: %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 3
+// SOFT: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 1
+// HARD: %vr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 4
+// HARD: %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i32 0, i32 2
+double test20(int a, ...) {
+  va_list vl;
+  va_start(vl, a);
+  return va_arg(vl, double);
+}
+
 // Vector types are only available for targets with the correct hardware, and
 // their calling-convention is left undefined by the soft-float ABI, so they
 // aren't tested here.

>From cfe16982d735f376e95e59ef7c82622ff683540c Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 6 Dec 2023 09:34:49 +0000
Subject: [PATCH 5/6] [AArch64] Add a backend test for the soft-float ABI

---
 clang/test/CodeGen/aarch64-soft-float-abi.c |   3 +
 llvm/test/CodeGen/AArch64/soft-float-abi.ll | 161 ++++++++++++++++++++
 2 files changed, 164 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/soft-float-abi.ll

diff --git a/clang/test/CodeGen/aarch64-soft-float-abi.c b/clang/test/CodeGen/aarch64-soft-float-abi.c
index 687647d650e1d..3eed6eaea6813 100644
--- a/clang/test/CodeGen/aarch64-soft-float-abi.c
+++ b/clang/test/CodeGen/aarch64-soft-float-abi.c
@@ -1,6 +1,9 @@
 // RUN: %clang_cc1 -triple aarch64 -target-feature +fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD
 // RUN: %clang_cc1 -triple aarch64 -target-feature -fp-armv8 -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,SOFT
 
+// See also llvm/test/CodeGen/AArch64/soft-float-abi.ll, which checks the LLVM
+// backend parts of the soft-float ABI.
+
 // The va_list type does not change between the ABIs
 // CHECK: %struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
 
diff --git a/llvm/test/CodeGen/AArch64/soft-float-abi.ll b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
new file mode 100644
index 0000000000000..291c3875c2488
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/soft-float-abi.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc --mtriple aarch64-none-eabi < %s -mattr=-fp-armv8 | FileCheck %s
+
+; See also clang/test/CodeGen/aarch64-soft-float-abi.c, which tests the clang
+; parts of the soft-float ABI.
+
+; FP types up to 64-bit are passed in a general purpose register.
+define half @test0(half %a, half %b)  {
+; CHECK-LABEL: test0:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret half %b
+}
+
+define bfloat @test1(i32 %a, bfloat %b) {
+; CHECK-LABEL: test1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret bfloat %b
+}
+
+define float @test2(i64 %a, float %b) {
+; CHECK-LABEL: test2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov w0, w1
+; CHECK-NEXT:    ret
+entry:
+  ret float %b
+}
+
+define double @test3(half %a, double %b) {
+; CHECK-LABEL: test3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x1
+; CHECK-NEXT:    ret
+entry:
+  ret double %b
+}
+
+; fp128 is passed in a pair of GPRs.
+define fp128 @test4(fp128 %a, fp128 %b) {
+; CHECK-LABEL: test4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; fp128 is passed in an aligned pair of GPRs, leaving one register unused is
+; necessary.
+define fp128 @test5(float %a, fp128 %b) {
+; CHECK-LABEL: test5:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x1, x3
+; CHECK-NEXT:    mov x0, x2
+; CHECK-NEXT:    ret
+entry:
+  ret fp128 %b
+}
+
+; If the alignment of an fp128 leaves a register unused, it remains unused even
+; if a later argument could fit in it.
+define i64 @test6(i64 %a, fp128 %b, i64 %c) {
+; CHECK-LABEL: test6:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mov x0, x4
+; CHECK-NEXT:    ret
+entry:
+  ret i64 %c
+}
+
+; HFAs are all bit-casted to integer types in the frontend when using the
+; soft-float ABI, so they get passed in the same way as non-homeogeneous
+; aggregates. The IR is identical to the equivalent integer types, so nothing
+; to test here.
+
+; The PCS for vector and HVA types is not defined by the soft-float ABI because
+; these types are only defined by the ACLE when vector hardware is available,
+; so nothing to test here.
+
+; The front-end generates IR for va_arg which always reads from the integer
+; register save area, and never the floating-point register save area. The
+; layout of the va_list type remains the same, the floating-point related
+; fields are unused. The only change needed in the backend is  in va_start, to
+; not attempt to save the floating-point registers or set the FP fields in the
+; va_list.
+%struct.__va_list = type { ptr, ptr, ptr, i32, i32 }
+declare void @llvm.va_start(ptr)
+define double @test20(i32 %a, ...) {
+; CHECK-LABEL: test20:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #96
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    mov w8, #-56 // =0xffffffc8
+; CHECK-NEXT:    add x10, sp, #8
+; CHECK-NEXT:    add x9, sp, #96
+; CHECK-NEXT:    str x8, [sp, #88]
+; CHECK-NEXT:    add x10, x10, #56
+; CHECK-NEXT:    ldrsw x8, [sp, #88]
+; CHECK-NEXT:    stp x1, x2, [sp, #8]
+; CHECK-NEXT:    stp x3, x4, [sp, #24]
+; CHECK-NEXT:    stp x5, x6, [sp, #40]
+; CHECK-NEXT:    stp x7, x9, [sp, #56]
+; CHECK-NEXT:    str x10, [sp, #72]
+; CHECK-NEXT:    tbz w8, #31, .LBB7_3
+; CHECK-NEXT:  // %bb.1: // %vaarg.maybe_reg
+; CHECK-NEXT:    add w9, w8, #8
+; CHECK-NEXT:    cmn w8, #8
+; CHECK-NEXT:    str w9, [sp, #88]
+; CHECK-NEXT:    b.gt .LBB7_3
+; CHECK-NEXT:  // %bb.2: // %vaarg.in_reg
+; CHECK-NEXT:    ldr x9, [sp, #72]
+; CHECK-NEXT:    add x8, x9, x8
+; CHECK-NEXT:    b .LBB7_4
+; CHECK-NEXT:  .LBB7_3: // %vaarg.on_stack
+; CHECK-NEXT:    ldr x8, [sp, #64]
+; CHECK-NEXT:    add x9, x8, #8
+; CHECK-NEXT:    str x9, [sp, #64]
+; CHECK-NEXT:  .LBB7_4: // %vaarg.end
+; CHECK-NEXT:    ldr x0, [x8]
+; CHECK-NEXT:    add sp, sp, #96
+; CHECK-NEXT:    ret
+entry:
+  %vl = alloca %struct.__va_list, align 8
+  call void @llvm.va_start(ptr nonnull %vl)
+  %gr_offs_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 3
+  %gr_offs = load i32, ptr %gr_offs_p, align 8
+  %0 = icmp sgt i32 %gr_offs, -1
+  br i1 %0, label %vaarg.on_stack, label %vaarg.maybe_reg
+
+vaarg.maybe_reg:                                  ; preds = %entry
+  %new_reg_offs = add nsw i32 %gr_offs, 8
+  store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+  %inreg = icmp slt i32 %gr_offs, -7
+  br i1 %inreg, label %vaarg.in_reg, label %vaarg.on_stack
+
+vaarg.in_reg:                                     ; preds = %vaarg.maybe_reg
+  %reg_top_p = getelementptr inbounds %struct.__va_list, ptr %vl, i64 0, i32 1
+  %reg_top = load ptr, ptr %reg_top_p, align 8
+  %1 = sext i32 %gr_offs to i64
+  %2 = getelementptr inbounds i8, ptr %reg_top, i64 %1
+  br label %vaarg.end
+
+vaarg.on_stack:                                   ; preds = %vaarg.maybe_reg, %entry
+  %stack = load ptr, ptr %vl, align 8
+  %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+  store ptr %new_stack, ptr %vl, align 8
+  br label %vaarg.end
+
+vaarg.end:                                        ; preds = %vaarg.on_stack, %vaarg.in_reg
+  %vaargs.addr = phi ptr [ %2, %vaarg.in_reg ], [ %stack, %vaarg.on_stack ]
+  %3 = load double, ptr %vaargs.addr, align 8
+  ret double %3
+}
+

>From 5ee37ae18fccde63d772f7036ae665feae30dd79 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 6 Dec 2023 09:38:48 +0000
Subject: [PATCH 6/6] [AArch64] Invert check for soft-float ABI (NFC)

If any other AArch64ABIKinds reach here in future they will use
floating-point registers, which is a better default.
---
 clang/lib/CodeGen/Targets/AArch64.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index a4089bb6c70f1..4d0035307d1af 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -540,7 +540,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
     BaseTy = ArrTy->getElementType();
     NumRegs = ArrTy->getNumElements();
   }
-  bool IsFPR = Kind == AArch64ABIKind::AAPCS &&
+  bool IsFPR = Kind != AArch64ABIKind::AAPCSSoft &&
                (BaseTy->isFloatingPointTy() || BaseTy->isVectorTy());
 
   // The AArch64 va_list type and handling is specified in the Procedure Call