[llvm] r364909 - [ARM] Stop using scalar FP instructions in integer-only MVE mode.

Tue Jul 2 04:26:00 PDT 2019

Author: statham
Date: Tue Jul  2 04:26:00 2019
New Revision: 364909

URL: http://llvm.org/viewvc/llvm-project?rev=364909&view=rev
Log:
[ARM] Stop using scalar FP instructions in integer-only MVE mode.

If you compile with `-mattr=+mve` (enabling integer MVE instructions
but not floating-point ones), then the scalar FP //registers// exist
and it's legal to move things in and out of them, load and store them,
but it's not legal to do arithmetic on them.

In D60708, the calls to `addRegisterClass` in ARMISelLowering that
enable use of the scalar FP registers became conditionalised on
`Subtarget->hasFPRegs()` instead of `Subtarget->hasVFP2Base()`, so
that loads, stores and moves of those registers would work. But I
didn't realise that that would also enable all the operations on those
types by default.

Now, if the target doesn't have basic VFP, we follow up those
`addRegisterClass` calls by turning back off all the nontrivial
operations you can perform on f32 and f64. That causes several
knock-on failures, which are fixed by allowing the `VMOVDcc` and
`VMOVScc` instructions to be selected even if all you have is
`HasFPRegs`, and adjusting several checks for 'is this a double in a
single-precision-only world?' to the more general 'is this any FP type
we can't do arithmetic on?'. Between those, the whole of the
`float-ops.ll` and `fp16-instructions.ll` tests can now run in
MVE-without-FP mode and generate correct-looking code.

One odd side effect is that I had to relax the check lines in that
test so that they permit test functions like `add_f` to be generated
as tailcalls to software FP library functions, instead of ordinary
calls. Doing that is entirely legal, but the mystery is why this is
the first RUN line that's needed the relaxation: on the usual kind of
non-FP target, no tailcalls ever seem to be generated. Going by the
llc messages, I think `SoftenFloatResult` must be perturbing the code
generation in some way, but that's as much as I can guess.

Reviewers: dmgreen, ostannard

Subscribers: javed.absar, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63938

Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll
    llvm/trunk/test/CodeGen/Thumb2/float-ops.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=364909&r1=364908&r2=364909&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Tue Jul  2 04:26:00 2019
@@ -224,6 +224,13 @@ void ARMTargetLowering::addQRTypeForNEON
 void ARMTargetLowering::setAllExpand(MVT VT) {
   for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
     setOperationAction(Opc, VT, Expand);
+
+  // We support these really simple operations even on types where all
+  // the actual arithmetic has to be broken down into simpler
+  // operations or turned into library calls.
+  setOperationAction(ISD::BITCAST, VT, Legal);
+  setOperationAction(ISD::LOAD, VT, Legal);
+  setOperationAction(ISD::STORE, VT, Legal);
 }
 
 void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
@@ -262,9 +269,6 @@ void ARMTargetLowering::addMVEVectorType
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
-    setOperationAction(ISD::BITCAST, VT, Legal);
-    setOperationAction(ISD::LOAD, VT, Legal);
-    setOperationAction(ISD::STORE, VT, Legal);
 
     if (HasMVEFP) {
       // No native support for these.
@@ -289,9 +293,6 @@ void ARMTargetLowering::addMVEVectorType
   for (auto VT : LongTypes) {
     addRegisterClass(VT, &ARM::QPRRegClass);
     setAllExpand(VT);
-    setOperationAction(ISD::BITCAST, VT, Legal);
-    setOperationAction(ISD::LOAD, VT, Legal);
-    setOperationAction(ISD::STORE, VT, Legal);
   }
 
   // It is legal to extload from v4i8 to v4i16 or v4i32.
@@ -594,10 +595,14 @@ ARMTargetLowering::ARMTargetLowering(con
   else
     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
 
-  if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
-      !Subtarget->isThumb1Only()) {
+  if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
+      Subtarget->hasFPRegs()) {
     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
+    if (!Subtarget->hasVFP2Base())
+      setAllExpand(MVT::f32);
+    if (!Subtarget->hasFP64())
+      setAllExpand(MVT::f64);
   }
 
   if (Subtarget->hasFullFP16()) {
@@ -4544,6 +4549,16 @@ static bool isLowerSaturatingConditional
   return false;
 }
 
+bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
+  if (VT == MVT::f32)
+    return !Subtarget->hasVFP2Base();
+  if (VT == MVT::f64)
+    return !Subtarget->hasFP64();
+  if (VT == MVT::f16)
+    return !Subtarget->hasFullFP16();
+  return false;
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
@@ -4587,9 +4602,9 @@ SDValue ARMTargetLowering::LowerSELECT_C
   SDValue TrueVal = Op.getOperand(2);
   SDValue FalseVal = Op.getOperand(3);
 
-  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
 
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
@@ -4828,9 +4843,9 @@ SDValue ARMTargetLowering::LowerBR_CC(SD
   SDValue Dest = Op.getOperand(4);
   SDLoc dl(Op);
 
-  if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) {
-    DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
-                                                    dl);
+  if (isUnsupportedFloatingType(LHS.getValueType())) {
+    DAG.getTargetLoweringInfo().softenSetCCOperands(
+        DAG, LHS.getValueType(), LHS, RHS, CC, dl);
 
     // If softenSetCCOperands only returned one value, we should compare it to
     // zero.
@@ -4975,7 +4990,7 @@ SDValue ARMTargetLowering::LowerFP_TO_IN
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorFP_TO_INT(Op, DAG);
-  if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::FP_TO_SINT)
       LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
@@ -5039,7 +5054,7 @@ SDValue ARMTargetLowering::LowerINT_TO_F
   EVT VT = Op.getValueType();
   if (VT.isVector())
     return LowerVectorINT_TO_FP(Op, DAG);
-  if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) {
+  if (isUnsupportedFloatingType(VT)) {
     RTLIB::Libcall LC;
     if (Op.getOpcode() == ISD::SINT_TO_FP)
       LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=364909&r1=364908&r2=364909&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Tue Jul  2 04:26:00 2019
@@ -794,6 +794,8 @@ class VectorType;
 
     bool shouldConsiderGEPOffsetSplit() const override { return true; }
 
+    bool isUnsupportedFloatingType(EVT VT) const;
+
     SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal,
                     SDValue ARMcc, SDValue CCR, SDValue Cmp,
                     SelectionDAG &DAG) const;

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=364909&r1=364908&r2=364909&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Tue Jul  2 04:26:00 2019
@@ -2269,13 +2269,13 @@ def VMOVDcc  : PseudoInst<(outs DPR:$Dd)
                     IIC_fpUNA64,
                     [(set (f64 DPR:$Dd),
                           (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>,
-               RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>;
+               RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>;
 
 def VMOVScc  : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p),
                     IIC_fpUNA32,
                     [(set (f32 SPR:$Sd),
                           (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>,
-               RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>;
+               RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>;
 } // hasSideEffects
 
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll?rev=364909&r1=364908&r2=364909&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll Tue Jul  2 04:26:00 2019
@@ -1,6 +1,8 @@
 ; SOFT:
 ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
 ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft   | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
 
 ; SOFTFP:
 ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3
@@ -206,8 +208,8 @@ for.end:
 
 ; CHECK-LABEL:            VCMPBRCC:
 
-; CHECK-SOFT:             bl  __aeabi_fcmpgt
-; CHECK-SOFT:             cmp r0, #0
+; CHECK-SOFT:             bl  __aeabi_fcmp{{gt|le}}
+; CHECK-SOFT:             cmp r0, #{{0|1}}
 
 ; CHECK-SOFTFP-FP16:      vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]]
 ; CHECK-SOFTFP-FP16:      vcmpe.f32 [[S2]], s0

Modified: llvm/trunk/test/CodeGen/Thumb2/float-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/float-ops.ll?rev=364909&r1=364908&r2=364909&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/float-ops.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/float-ops.ll Tue Jul  2 04:26:00 2019
@@ -1,12 +1,13 @@
-; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE
+; RUN: llc < %s -mtriple=thumbv7-none-eabi   -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -check-prefix=NOREGS
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VFP4-ALL
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=FP-ARMv8
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP4-ALL -check-prefix=VFP4-DP
+; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabihf -mattr=+mve | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -check-prefix=ONLYREGS
 
 define float @add_f(float %a, float %b) {
 entry:
 ; CHECK-LABEL: add_f:
-; NONE: bl __aeabi_fadd
+; NONE: {{b|bl}} __aeabi_fadd
 ; HARD: vadd.f32  s0, s0, s1
   %0 = fadd float %a, %b
   ret float %0
@@ -15,8 +16,8 @@ entry:
 define double @add_d(double %a, double %b) {
 entry:
 ; CHECK-LABEL: add_d:
-; NONE: bl __aeabi_dadd
-; SP: bl __aeabi_dadd
+; NONE: {{b|bl}} __aeabi_dadd
+; SP: {{b|bl}} __aeabi_dadd
 ; DP: vadd.f64  d0, d0, d1
   %0 = fadd double %a, %b
   ret double %0
@@ -25,7 +26,7 @@ entry:
 define float @sub_f(float %a, float %b) {
 entry:
 ; CHECK-LABEL: sub_f:
-; NONE: bl __aeabi_fsub
+; NONE: {{b|bl}} __aeabi_fsub
 ; HARD: vsub.f32  s
   %0 = fsub float %a, %b
   ret float %0
@@ -34,8 +35,8 @@ entry:
 define double @sub_d(double %a, double %b) {
 entry:
 ; CHECK-LABEL: sub_d:
-; NONE: bl __aeabi_dsub
-; SP: bl __aeabi_dsub
+; NONE: {{b|bl}} __aeabi_dsub
+; SP: {{b|bl}} __aeabi_dsub
 ; DP: vsub.f64  d0, d0, d1
   %0 = fsub double %a, %b
   ret double %0
@@ -44,7 +45,7 @@ entry:
 define float @mul_f(float %a, float %b) {
 entry:
 ; CHECK-LABEL: mul_f:
-; NONE: bl __aeabi_fmul
+; NONE: {{b|bl}} __aeabi_fmul
 ; HARD: vmul.f32  s
   %0 = fmul float %a, %b
   ret float %0
@@ -53,8 +54,8 @@ entry:
 define double @mul_d(double %a, double %b) {
 entry:
 ; CHECK-LABEL: mul_d:
-; NONE: bl __aeabi_dmul
-; SP: bl __aeabi_dmul
+; NONE: {{b|bl}} __aeabi_dmul
+; SP: {{b|bl}} __aeabi_dmul
 ; DP: vmul.f64  d0, d0, d1
   %0 = fmul double %a, %b
   ret double %0
@@ -63,7 +64,7 @@ entry:
 define float @div_f(float %a, float %b) {
 entry:
 ; CHECK-LABEL: div_f:
-; NONE: bl __aeabi_fdiv
+; NONE: {{b|bl}} __aeabi_fdiv
 ; HARD: vdiv.f32  s
   %0 = fdiv float %a, %b
   ret float %0
@@ -72,8 +73,8 @@ entry:
 define double @div_d(double %a, double %b) {
 entry:
 ; CHECK-LABEL: div_d:
-; NONE: bl __aeabi_ddiv
-; SP: bl __aeabi_ddiv
+; NONE: {{b|bl}} __aeabi_ddiv
+; SP: {{b|bl}} __aeabi_ddiv
 ; DP: vdiv.f64  d0, d0, d1
   %0 = fdiv double %a, %b
   ret double %0
@@ -109,7 +110,8 @@ entry:
 define double @load_d(double* %a) {
 entry:
 ; CHECK-LABEL: load_d:
-; NONE: ldm r0, {r0, r1}
+; NOREGS: ldm r0, {r0, r1}
+; ONLYREGS: vldr d0, [r0]
 ; HARD: vldr d0, [r0]
   %0 = load double, double* %a, align 8
   ret double %0
@@ -127,7 +129,8 @@ entry:
 define void @store_d(double* %a, double %b) {
 entry:
 ; CHECK-LABEL: store_d:
-; NONE: strd r2, r3, [r0]
+; NOREGS: strd r2, r3, [r0]
+; ONLYREGS: vstr d0, [r0]
 ; HARD: vstr d0, [r0]
   store double %b, double* %a, align 8
   ret void
@@ -259,8 +262,10 @@ define i64 @bitcast_d_to_i(double %a) {
 
 define float @select_f(float %a, float %b, i1 %c) {
 ; CHECK-LABEL: select_f:
-; NONE: lsls    r2, r2, #31
-; NONE: moveq   r0, r1
+; NOREGS: lsls    r2, r2, #31
+; NOREGS: moveq   r0, r1
+; ONLYREGS: lsls    r2, r2, #31
+; ONLYREGS: vmovne.f32      s2, s0
 ; HARD: lsls    r0, r0, #31
 ; VFP4-ALL: vmovne.f32      s1, s0
 ; VFP4-ALL: vmov.f32        s0, s1
@@ -273,8 +278,8 @@ define double @select_d(double %a, doubl
 ; CHECK-LABEL: select_d:
 ; NONE: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
 ; NONE  ands    [[REG]], [[REG]], #1
-; NONE: moveq   r0, r2
-; NONE: moveq   r1, r3
+; NONE-DAG: moveq   r0, r2
+; NONE-DAG: moveq   r1, r3
 ; SP: ands r0, r0, #1
 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1