[clang] 60ea6f3 - [ARM] Allow selecting hard-float ABI in integer-only MVE.

Wed Feb 1 01:05:23 PST 2023

Author: Simon Tatham
Date: 2023-02-01T09:05:12Z
New Revision: 60ea6f35a270d11c91770a2fc366888e7d3859f4

URL: https://github.com/llvm/llvm-project/commit/60ea6f35a270d11c91770a2fc366888e7d3859f4
DIFF: https://github.com/llvm/llvm-project/commit/60ea6f35a270d11c91770a2fc366888e7d3859f4.diff

LOG: [ARM] Allow selecting hard-float ABI in integer-only MVE.

Armv8.1-M can be configured to support the integer subset of the MVE
vector instructions, and no floating point. In that situation, the FP
and vector registers still exist, and so do the load, store and move
instructions that transfer data in and out of them. So there's no
reason the hard floating point ABI can't be supported, and you might
reasonably want to use it, for the sake of intrinsics-based code
passing explicit MVE vector types between functions.

But the selection of the hard float ABI in the backend was gated on
Subtarget->hasVFP2Base(), which is false in the case of integer MVE
and no FP.

As a result, you'd silently get the soft float ABI even if you
deliberately tried to select it, e.g. with clang options such as
--target=arm-none-eabi -mfloat-abi=hard -march=armv8.1m.main+nofp+mve

The hard float ABI should have been gated on the weaker condition
Subtarget->hasFPRegs(), because the only requirement for being able to
pass arguments in the FP registers is that the registers themselves
should exist.

I haven't added a new test, because changing the existing
CodeGen/Thumb2/float-ops.ll test seemed sufficient. But I've added a
comment explaining why the results are expected to be what they are.

Reviewed By: lenary

Differential Revision: https://reviews.llvm.org/D142703

Added: 
    

Modified: 
    clang/docs/ReleaseNotes.rst
    llvm/docs/ReleaseNotes.rst
    llvm/lib/Target/ARM/ARMFastISel.cpp
    llvm/lib/Target/ARM/ARMISelLowering.cpp
    llvm/test/CodeGen/Thumb2/float-ops.ll

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index c6139252e0c34..2b2ca8b2987f0 100644

--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -163,6 +163,13 @@ DWARF Support in Clang
 Arm and AArch64 Support in Clang
 --------------------------------
 
+* The hard-float ABI is now available in Armv8.1-M configurations that
+  have integer MVE instructions (and therefore have FP registers) but
+  no scalar or vector floating point computation. Previously, trying
+  to select the hard-float ABI on such a target (via
+  ``-mfloat-abi=hard`` or a triple ending in ``hf``) would silently
+  use the soft-float ABI instead.
+
 Floating Point Support in Clang
 -------------------------------
 

diff  --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index d628257a76904..a3f02992048a4 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -71,6 +71,10 @@ Changes to the AMDGPU Backend
 Changes to the ARM Backend
 --------------------------
 
+- The hard-float ABI is now available in Armv8.1-M configurations that
+  have integer MVE instructions (and therefore have FP registers) but
+  no scalar or vector floating point computation.
+
 Changes to the AVR Backend
 --------------------------
 

diff  --git a/llvm/lib/Target/ARM/ARMFastISel.cpp b/llvm/lib/Target/ARM/ARMFastISel.cpp
index 62a090f4bca81..60a6e9ade9234 100644
--- a/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -1842,7 +1842,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::CXX_FAST_TLS:
     // Use target triple & subtarget features to do actual dispatch.
     if (Subtarget->isAAPCS_ABI()) {
-      if (Subtarget->hasVFP2Base() &&
+      if (Subtarget->hasFPRegs() &&
           TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
       else

diff  --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 8a28e6b4e4fd2..07fa829731563 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -2081,7 +2081,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   case CallingConv::Tail:
     if (!Subtarget->isAAPCS_ABI())
       return CallingConv::ARM_APCS;
-    else if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() &&
+    else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
              !isVarArg)
       return CallingConv::ARM_AAPCS_VFP;

diff  --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll
index 51f18afaf0a46..d2b1dd6f05a3f 100644
--- a/llvm/test/CodeGen/Thumb2/float-ops.ll
+++ b/llvm/test/CodeGen/Thumb2/float-ops.ll
@@ -83,7 +83,7 @@ entry:
 define float @rem_f(float %a, float %b) {
 entry:
 ; CHECK-LABEL: rem_f:
-; NONE: bl fmodf
+; NONE: {{b|bl}} fmodf
 ; HARD: b fmodf
   %0 = frem float %a, %b
   ret float %0
@@ -92,16 +92,23 @@ entry:
 define double @rem_d(double %a, double %b) {
 entry:
 ; CHECK-LABEL: rem_d:
-; NONE: bl fmod
+; NONE: {{b|bl}} fmod
 ; HARD: b fmod
   %0 = frem double %a, %b
   ret double %0
 }
 
+; In the ONLYREGS case (where we have integer MVE but no floating
+; point), we still expect the hard float ABI, because we asked for it
+; in the triple, and since the FP registers exist, it's possible to
+; use them to pass arguments. So the generated code should load the
+; return value into s0, not r0. Similarly for the other load and store
+; tests.
 define float @load_f(ptr %a) {
 entry:
 ; CHECK-LABEL: load_f:
-; NONE: ldr r0, [r0]
+; NOREGS: ldr r0, [r0]
+; ONLYREGS: vldr s0, [r0]
 ; HARD: vldr s0, [r0]
   %0 = load float, ptr %a, align 4
   ret float %0
@@ -120,7 +127,8 @@ entry:
 define void @store_f(ptr %a, float %b) {
 entry:
 ; CHECK-LABEL: store_f:
-; NONE: str r1, [r0]
+; NOREGS: str r1, [r0]
+; ONLYREGS: vstr s0, [r0]
 ; HARD: vstr s0, [r0]
   store float %b, ptr %a, align 4
   ret void
@@ -130,7 +138,7 @@ define void @store_d(ptr %a, double %b) {
 entry:
 ; CHECK-LABEL: store_d:
 ; NOREGS: strd r2, r3, [r0]
-; ONLYREGS: strd r2, r3, [r0]
+; ONLYREGS: vstr d0, [r0]
 ; HARD: vstr d0, [r0]
   store double %b, ptr %a, align 8
   ret void
@@ -230,7 +238,8 @@ define double @ui_to_d(i32 %a) {
 
 define float @bitcast_i_to_f(i32 %a) {
 ; CHECK-LABEL: bitcast_i_to_f:
-; NONE-NOT: mov
+; NOREGS-NOT: mov
+; ONLYREGS: vmov s0, r0
 ; HARD: vmov s0, r0
   %1 = bitcast i32 %a to float
   ret float %1
@@ -238,15 +247,17 @@ define float @bitcast_i_to_f(i32 %a) {
 
 define double @bitcast_i_to_d(i64 %a) {
 ; CHECK-LABEL: bitcast_i_to_d:
-; NONE-NOT: mov
+; NOREGS-NOT: mov
+; ONLYREGS: vmov d0, r0, r1
 ; HARD: vmov d0, r0, r1
-  %1 = bitcast i64 %a to double
+ %1 = bitcast i64 %a to double
   ret double %1
 }
 
 define i32 @bitcast_f_to_i(float %a) {
 ; CHECK-LABEL: bitcast_f_to_i:
-; NONE-NOT: mov
+; NOREGS-NOT: mov
+; ONLYREGS: vmov r0, s0
 ; HARD: vmov r0, s0
   %1 = bitcast float %a to i32
   ret i32 %1
@@ -254,7 +265,8 @@ define i32 @bitcast_f_to_i(float %a) {
 
 define i64 @bitcast_d_to_i(double %a) {
 ; CHECK-LABEL: bitcast_d_to_i:
-; NONE-NOT: mov
+; NOREGS-NOT: mov
+; ONLYREGS: vmov r0, r1, d0
 ; HARD: vmov r0, r1, d0
   %1 = bitcast double %a to i64
   ret i64 %1
@@ -264,8 +276,8 @@ define float @select_f(float %a, float %b, i1 %c) {
 ; CHECK-LABEL: select_f:
 ; NOREGS: lsls    r2, r2, #31
 ; NOREGS: moveq   r0, r1
-; ONLYREGS: lsls    r2, r2, #31
-; ONLYREGS: vmovne.f32      s2, s0
+; ONLYREGS: lsls    r0, r0, #31
+; ONLYREGS: vmovne.f32      s1, s0
 ; HARD: lsls    r0, r0, #31
 ; VFP4-ALL: vmovne.f32      s1, s0
 ; VFP4-ALL: vmov.f32        s0, s1
@@ -276,8 +288,9 @@ define float @select_f(float %a, float %b, i1 %c) {
 
 define double @select_d(double %a, double %b, i1 %c) {
 ; CHECK-LABEL: select_d:
-; NONE: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
-; NONE: ands    [[REG]], [[REG]], #1
+; NOREGS: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
+; NOREGS: ands    [[REG]], [[REG]], #1
+; ONLYREGS: ands    r0, r0, #1
 ; NOREGS-DAG: moveq   r0, r2
 ; NOREGS-DAG: moveq   r1, r3
 ; ONLYREGS-DAG: csel   r0, r0, r2