[llvm] r323512 - [ARM] Armv8.2-A FP16 code generation (part 1/3)

Fri Jan 26 01:26:41 PST 2018

Author: sjoerdmeijer
Date: Fri Jan 26 01:26:40 2018
New Revision: 323512

URL: http://llvm.org/viewvc/llvm-project?rev=323512&view=rev
Log:
[ARM] Armv8.2-A FP16 code generation (part 1/3)

This is the groundwork for Armv8.2-A FP16 code generation .

Clang passes and returns _Float16 values as floats, together with the required
bitconverts and truncs etc. to implement correct AAPCS behaviour, see D42318.
We will implement half-precision argument passing/returning lowering in the ARM
backend soon, but for now this means that this:

_Float16 sub(_Float16 a, _Float16 b) {
  return a + b;
}

gets lowered to this:

define float @sub(float %a.coerce, float %b.coerce) {
entry:
  %0 = bitcast float %a.coerce to i32
  %tmp.0.extract.trunc = trunc i32 %0 to i16
  %1 = bitcast i16 %tmp.0.extract.trunc to half
  <SNIP>
  %add = fadd half %1, %3
  <SNIP>
}

When FullFP16 is *not* supported, we don't make f16 a legal type, and we get
legalization for "free", i.e. nothing changes and everything works as before.
And also f16 argument passing/returning is handled.

When FullFP16 is supported, we do make f16 a legal type, and have 2 places that
we need to patch up: f16 argument passing and returning, which involves minor
tweaks to avoid unnecessary code generation for some bitcasts.

As a "demonstrator" that this works for the different FP16, FullFP16, softfp
modes, etc., I've added match rules to the VSUB instruction description showing
that we can codegen this instruction from IR, but more importantly, also to
some conversion instructions. These conversions were causing issue before in
the FP16 and FullFP16 cases.

I've also added match rules to the VLDRH and VSTRH desriptions, so that we can
actually compile the entire half-precision sub code example above. This showed
that these loads and stores had the wrong addressing mode specified: AddrMode5
instead of AddrMode5FP16, which turned out not be implemented at all, so that
has also been added.

This is the minimal patch that shows all the different moving parts. In patch
2/3 I will add some efficient lowering of bitcasts, and in 2/3 I will add the
remaining Armv8.2-A FP16 instruction descriptions.


Thanks to Sam Parker and Oliver Stannard for their help and reviews!


Differential Revision: https://reviews.llvm.org/D38315


Added:
    llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll
Modified:
    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMCallingConv.td
    llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
    llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
    llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
    llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll

Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Fri Jan 26 01:26:40 2018
@@ -2409,6 +2409,14 @@ bool llvm::rewriteARMFrameIndex(MachineI
       NumBits = 8;
       Scale = 4;
       break;
+    case ARMII::AddrMode5FP16:
+      ImmIdx = FrameRegIdx+1;
+      InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
+      if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
+        InstrOffs *= -1;
+      NumBits = 8;
+      Scale = 2;
+      break;
     default:
       llvm_unreachable("Unsupported addressing mode!");
     }

Modified: llvm/trunk/lib/Target/ARM/ARMCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMCallingConv.td?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMCallingConv.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMCallingConv.td Fri Jan 26 01:26:40 2018
@@ -187,6 +187,7 @@ def RetCC_ARM_AAPCS : CallingConv<[
 
   CCIfType<[f64, v2f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
   CCIfType<[f32], CCBitConvertToType<i32>>,
+
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
 
@@ -233,7 +234,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
   CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>,
   CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
   CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
-                                 S9, S10, S11, S12, S13, S14, S15]>>,
+                                      S9, S10, S11, S12, S13, S14, S15]>>,
   CCDelegateTo<RetCC_ARM_AAPCS_Common>
 ]>;
 

Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Fri Jan 26 01:26:40 2018
@@ -118,8 +118,10 @@ public:
                        SDValue &Offset, SDValue &Opc);
   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
                              SDValue &Offset, SDValue &Opc);
-  bool SelectAddrMode5(SDValue N, SDValue &Base,
-                       SDValue &Offset);
+  bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
+                         int Lwb, int Upb, bool FP16);
+  bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
+  bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
 
@@ -886,8 +888,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3Off
   return true;
 }
 
-bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
-                                      SDValue &Base, SDValue &Offset) {
+bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
+                                        int Lwb, int Upb, bool FP16) {
   if (!CurDAG->isBaseWithConstantOffset(N)) {
     Base = N;
     if (N.getOpcode() == ISD::FrameIndex) {
@@ -907,8 +909,9 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SD
 
   // If the RHS is +/- imm8, fold into addr mode.
   int RHSC;
-  if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
-                              -256 + 1, 256, RHSC)) {
+  const int Scale = FP16 ? 2 : 4;
+
+  if (isScaledConstantInRange(N.getOperand(1), Scale, Lwb, Upb, RHSC)) {
     Base = N.getOperand(0);
     if (Base.getOpcode() == ISD::FrameIndex) {
       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
@@ -921,17 +924,43 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SD
       AddSub = ARM_AM::sub;
       RHSC = -RHSC;
     }
-    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
-                                       SDLoc(N), MVT::i32);
+
+    if (FP16)
+      Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
+                                         SDLoc(N), MVT::i32);
+    else
+      Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
+                                         SDLoc(N), MVT::i32);
+
     return true;
   }
 
   Base = N;
-  Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
-                                     SDLoc(N), MVT::i32);
+
+  if (FP16)
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
+                                       SDLoc(N), MVT::i32);
+  else
+    Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
+                                       SDLoc(N), MVT::i32);
+
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
+                                      SDValue &Base, SDValue &Offset) {
+  int Lwb = -256 + 1;
+  int Upb = 256;
+  return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ false);
+}
+
+bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
+                                          SDValue &Base, SDValue &Offset) {
+  int Lwb = -512 + 1;
+  int Upb = 512;
+  return IsAddressingMode5(N, Base, Offset, Lwb, Upb, /*FP16=*/ true);
+}
+
 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
                                       SDValue &Align) {
   Addr = N;

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Jan 26 01:26:40 2018
@@ -522,6 +522,13 @@ ARMTargetLowering::ARMTargetLowering(con
     addRegisterClass(MVT::f64, &ARM::DPRRegClass);
   }
 
+  if (Subtarget->hasFullFP16()) {
+    addRegisterClass(MVT::f16, &ARM::HPRRegClass);
+    // Clean up bitcast of incoming arguments if hard float abi is enabled.
+    if (Subtarget->isTargetHardFloat())
+      setOperationAction(ISD::BITCAST, MVT::i16, Custom);
+  }
+
   for (MVT VT : MVT::vector_valuetypes()) {
     for (MVT InnerVT : MVT::vector_valuetypes()) {
       setTruncStoreAction(VT, InnerVT, Expand);
@@ -2474,12 +2481,37 @@ ARMTargetLowering::LowerReturn(SDValue C
     assert(VA.isRegLoc() && "Can only return in registers!");
 
     SDValue Arg = OutVals[realRVLocIdx];
+    bool ReturnF16 = false;
+
+    if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
+      // Half-precision return values can be returned like this:
+      //
+      // t11 f16 = fadd ...
+      // t12: i16 = bitcast t11
+      //   t13: i32 = zero_extend t12
+      // t14: f32 = bitcast t13
+      //
+      // to avoid code generation for bitcasts, we simply set Arg to the node
+      // that produces the f16 value, t11 in this case.
+      //
+      if (Arg.getValueType() == MVT::f32) {
+        SDValue ZE = Arg.getOperand(0);
+        if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
+          SDValue BC = ZE.getOperand(0);
+          if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
+            Arg = BC.getOperand(0);
+            ReturnF16 = true;
+          }
+        }
+      }
+    }
 
     switch (VA.getLocInfo()) {
     default: llvm_unreachable("Unknown loc info!");
     case CCValAssign::Full: break;
     case CCValAssign::BCvt:
-      Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
+      if (!ReturnF16)
+        Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
       break;
     }
 
@@ -2527,7 +2559,8 @@ ARMTargetLowering::LowerReturn(SDValue C
     // Guarantee that all emitted copies are
     // stuck together, avoiding something bad.
     Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(),
+                                     ReturnF16 ? MVT::f16 : VA.getLocVT()));
   }
   const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
   const MCPhysReg *I =
@@ -3684,7 +3717,10 @@ SDValue ARMTargetLowering::LowerFormalAr
       } else {
         const TargetRegisterClass *RC;
 
-        if (RegVT == MVT::f32)
+
+        if (RegVT == MVT::f16)
+          RC = &ARM::HPRRegClass;
+        else if (RegVT == MVT::f32)
           RC = &ARM::SPRRegClass;
         else if (RegVT == MVT::f64)
           RC = &ARM::DPRRegClass;
@@ -5024,6 +5060,37 @@ static SDValue ExpandBITCAST(SDNode *N,
   // source or destination of the bit convert.
   EVT SrcVT = Op.getValueType();
   EVT DstVT = N->getValueType(0);
+
+  // Half-precision arguments can be passed in like this:
+  //
+  //    t4: f32,ch = CopyFromReg t0, Register:f32 %1
+  //            t8: i32 = bitcast t4
+  //          t9: i16 = truncate t8
+  //        t10: f16 = bitcast t9   <~~~~ SDNode N
+  //
+  // but we want to avoid code generation for the bitcast, so transform this
+  // into:
+  //
+  // t18: f16 = CopyFromReg t0, Register:f32 %0
+  //
+  if (SrcVT == MVT::i16 && DstVT == MVT::f16) {
+     if (Op.getOpcode() != ISD::TRUNCATE)
+        return SDValue();
+
+    SDValue Bitcast = Op.getOperand(0);
+    if (Bitcast.getOpcode() != ISD::BITCAST ||
+        Bitcast.getValueType() != MVT::i32)
+      return SDValue();
+
+    SDValue Copy = Bitcast.getOperand(0);
+    if (Copy.getOpcode() != ISD::CopyFromReg ||
+        Copy.getValueType() != MVT::f32)
+      return SDValue();
+
+    SDValue Ops[] = { Copy->getOperand(0), Copy->getOperand(1) };
+    return DAG.getNode(ISD::CopyFromReg, SDLoc(Copy), MVT::f16, Ops);
+  }
+
   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
          "ExpandBITCAST called for non-i64 type");
 

Modified: llvm/trunk/lib/Target/ARM/ARMInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrFormats.td?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrFormats.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrFormats.td Fri Jan 26 01:26:40 2018
@@ -108,6 +108,7 @@ def AddrModeT2_so   : AddrMode<13>;
 def AddrModeT2_pc   : AddrMode<14>;
 def AddrModeT2_i8s4 : AddrMode<15>;
 def AddrMode_i12    : AddrMode<16>;
+def AddrMode5FP16   : AddrMode<17>;
 
 // Load / store index mode.
 class IndexMode<bits<2> val> {
@@ -1527,7 +1528,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod
 class AHI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
+  : VFPI<oops, iops, AddrMode5FP16, 4, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
   list<Predicate> Predicates = [HasFullFP16];
 

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Fri Jan 26 01:26:40 2018
@@ -69,10 +69,19 @@ def vfp_f64imm : Operand<f64>,
   let ParserMatchClass = FPImmOperand;
 }
 
+def alignedload16 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+  return cast<LoadSDNode>(N)->getAlignment() >= 2;
+}]>;
+
 def alignedload32 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 4;
 }]>;
 
+def alignedstore16 : PatFrag<(ops node:$val, node:$ptr),
+                             (store node:$val, node:$ptr), [{
+  return cast<StoreSDNode>(N)->getAlignment() >= 2;
+}]>;
+
 def alignedstore32 : PatFrag<(ops node:$val, node:$ptr),
                              (store node:$val, node:$ptr), [{
   return cast<StoreSDNode>(N)->getAlignment() >= 4;
@@ -113,9 +122,9 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR
   let D = VFPNeonDomain;
 }
 
-def VLDRH : AHI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5fp16:$addr),
+def VLDRH : AHI5<0b1101, 0b01, (outs HPR:$Sd), (ins addrmode5fp16:$addr),
                  IIC_fpLoad16, "vldr", ".16\t$Sd, $addr",
-                 []>,
+                 [(set HPR:$Sd, (alignedload16 addrmode5fp16:$addr))]>,
             Requires<[HasFullFP16]>;
 
 } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in'
@@ -132,9 +141,9 @@ def VSTRS : ASI5<0b1101, 0b00, (outs), (
   let D = VFPNeonDomain;
 }
 
-def VSTRH : AHI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5fp16:$addr),
+def VSTRH : AHI5<0b1101, 0b00, (outs), (ins HPR:$Sd, addrmode5fp16:$addr),
                  IIC_fpStore16, "vstr", ".16\t$Sd, $addr",
-                 []>,
+                 [(alignedstore16 HPR:$Sd, addrmode5fp16:$addr)]>,
             Requires<[HasFullFP16]>;
 
 //===----------------------------------------------------------------------===//
@@ -335,9 +344,9 @@ def VADDS  : ASbIn<0b11100, 0b11, 0, 0,
 
 let TwoOperandAliasConstraint = "$Sn = $Sd" in
 def VADDH  : AHbI<0b11100, 0b11, 0, 0,
-                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                   IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm",
-                  []>,
+                  [(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>,
              Sched<[WriteFPALU32]>;
 
 let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -360,9 +369,9 @@ def VSUBS  : ASbIn<0b11100, 0b11, 1, 0,
 
 let TwoOperandAliasConstraint = "$Sn = $Sd" in
 def VSUBH  : AHbI<0b11100, 0b11, 1, 0,
-                  (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                  (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                   IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm",
-                  []>,
+                  [(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>,
             Sched<[WriteFPALU32]>;
 
 let TwoOperandAliasConstraint = "$Dn = $Dd" in
@@ -658,17 +667,19 @@ def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins
   let Predicates = [HasVFP2, HasDPVFP];
 }
 
-// Between half, single and double-precision.  For disassembly only.
-
+// Between half, single and double-precision.
 def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>,
+                 [ /* intentionally left blank, see rule below */ ]>,
                  Requires<[HasFP16]>,
              Sched<[WriteFPCVT]>;
 
+def : Pat<(f32 (fpextend HPR:$Sm)),
+          (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+
 def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
-                 [/* For disassembly only; pattern left blank */]>,
+                 []>,
                  Requires<[HasFP16]>,
              Sched<[WriteFPCVT]>;
 

Modified: llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMRegisterInfo.td Fri Jan 26 01:26:40 2018
@@ -307,6 +307,18 @@ def SPR : RegisterClass<"ARM", [f32], 32
   let DiagnosticString = "operand must be a register in range [s0, s31]";
 }
 
+def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> {
+  let AltOrders = [(add (decimate HPR, 2), SPR),
+                   (add (decimate HPR, 4),
+                        (decimate HPR, 2),
+                        (decimate (rotl HPR, 1), 4),
+                        (decimate (rotl HPR, 1), 2))];
+  let AltOrderSelect = [{
+    return 1 + MF.getSubtarget<ARMSubtarget>().useStride4VFPs(MF);
+  }];
+  let DiagnosticString = "operand must be a register in range [s0, s31]";
+}
+
 // Subset of SPR which can be used as a source of NEON scalars for 16-bit
 // operations
 def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)> {

Modified: llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/ARM/Disassembler/ARMDisassembler.cpp Fri Jan 26 01:26:40 2018
@@ -158,6 +158,8 @@ static DecodeStatus DecoderGPRRegisterCl
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo,
                                    uint64_t Address, const void *Decoder);
 static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo,
@@ -996,6 +998,11 @@ static DecodeStatus DecodeSPRRegisterCla
   return MCDisassembler::Success;
 }
 
+static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                   uint64_t Address, const void *Decoder) {
+  return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder);
+}
+
 static const uint16_t DPRDecoderTable[] = {
      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,

Modified: llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h (original)
+++ llvm/trunk/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h Fri Jan 26 01:26:40 2018
@@ -186,7 +186,8 @@ namespace ARMII {
     AddrModeT2_so   = 13,
     AddrModeT2_pc   = 14, // +/- i12 for pc relative data
     AddrModeT2_i8s4 = 15, // i8 * 4
-    AddrMode_i12    = 16
+    AddrMode_i12    = 16,
+    AddrMode5FP16   = 17  // i8 * 2
   };
 
   inline static const char *AddrModeToString(AddrMode addrmode) {
@@ -197,6 +198,7 @@ namespace ARMII {
     case AddrMode3:       return "AddrMode3";
     case AddrMode4:       return "AddrMode4";
     case AddrMode5:       return "AddrMode5";
+    case AddrMode5FP16:   return "AddrMode5FP16";
     case AddrMode6:       return "AddrMode6";
     case AddrModeT1_1:    return "AddrModeT1_1";
     case AddrModeT1_2:    return "AddrModeT1_2";

Modified: llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll?rev=323512&r1=323511&r2=323512&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll Fri Jan 26 01:26:40 2018
@@ -43,7 +43,7 @@ define i17 @test_funny_ints(i17 %a, i17
 }
 
 define half @test_half(half %a, half %b) {
-; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)*
+; CHECK: remark: {{.*}} unable to lower arguments: half (half, half)* (in function: test_half)
 ; CHECK-LABEL: warning: Instruction selection used fallback path for test_half
   %res = fadd half %a, %b
   ret half %res

Added: llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll?rev=323512&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll Fri Jan 26 01:26:40 2018
@@ -0,0 +1,72 @@
+; SOFT:
+; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft     | FileCheck %s --check-prefix=CHECK-SOFT
+
+; SOFTFP:
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3        | FileCheck %s --check-prefix=CHECK-SOFTFP-VFP3
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp4        | FileCheck %s --check-prefix=CHECK-SOFTFP-FP16
+; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+fullfp16    | FileCheck %s --check-prefix=CHECK-SOFTFP-FULLFP16
+
+; HARD:
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp3      | FileCheck %s --check-prefix=CHECK-HARDFP-VFP3
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+vfp4      | FileCheck %s --check-prefix=CHECK-HARDFP-FP16
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16  | FileCheck %s --check-prefix=CHECK-HARDFP-FULLFP16
+
+define float @Add(float %a.coerce, float %b.coerce) local_unnamed_addr {
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %2 = bitcast float %b.coerce to i32
+  %tmp1.0.extract.trunc = trunc i32 %2 to i16
+  %3 = bitcast i16 %tmp1.0.extract.trunc to half
+  %add = fadd half %1, %3
+  %4 = bitcast half %add to i16
+  %tmp4.0.insert.ext = zext i16 %4 to i32
+  %5 = bitcast i32 %tmp4.0.insert.ext to float
+  ret float %5
+
+; CHECK-SOFT:  bl  __aeabi_h2f
+; CHECK-SOFT:  bl  __aeabi_h2f
+; CHECK-SOFT:  bl  __aeabi_fadd
+; CHECK-SOFT:  bl  __aeabi_f2h
+
+; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
+; CHECK-SOFTFP-VFP3:  bl  __aeabi_h2f
+; CHECK-SOFTFP-VFP3:  vadd.f32
+; CHECK-SOFTFP-VFP3:  bl  __aeabi_f2h
+
+; CHECK-SOFTFP-FP16:  vmov          [[S2:s[0-9]]], r1
+; CHECK-SOFTFP-FP16:  vmov          [[S0:s[0-9]]], r0
+; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S2]], [[S2]]
+; CHECK-SOFTFP-FP16:  vcvtb.f32.f16 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16:  vadd.f32      [[S0]], [[S0]], [[S2]]
+; CHECK-SOFTFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
+; CHECK-SOFTFP-FP16:  vmov  r0, s0
+
+; CHECK-SOFTFP-FULLFP16:  strh  r1, {{.*}}
+; CHECK-SOFTFP-FULLFP16:  strh  r0, {{.*}}
+; CHECK-SOFTFP-FULLFP16:  vldr.16 [[S0:s[0-9]]], {{.*}}
+; CHECK-SOFTFP-FULLFP16:  vldr.16 [[S2:s[0-9]]], {{.*}}
+; CHECK-SOFTFP-FULLFP16:  vadd.f16  [[S0]], [[S2]], [[S0]]
+; CHECK-SOFTFP-FULLFP16:  vstr.16 [[S2:s[0-9]]],  {{.*}}
+; CHECK-SOFTFP-FULLFP16:  ldrh  r0, {{.*}}
+; CHECK-SOFTFP-FULLFP16:  mov pc, lr
+
+; CHECK-HARDFP-VFP3:  vmov r{{.}}, s0
+; CHECK-HARDFP-VFP3:  vmov{{.*}}, s1
+; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
+; CHECK-HARDFP-VFP3:  bl  __aeabi_h2f
+; CHECK-HARDFP-VFP3:  vadd.f32
+; CHECK-HARDFP-VFP3:  bl  __aeabi_f2h
+; CHECK-HARDFP-VFP3:  vmov  s0, r0
+
+; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S2:s[0-9]]], s1
+; CHECK-HARDFP-FP16:  vcvtb.f32.f16 [[S0:s[0-9]]], s0
+; CHECK-HARDFP-FP16:  vadd.f32  [[S0]], [[S0]], [[S2]]
+; CHECK-HARDFP-FP16:  vcvtb.f16.f32 [[S0]], [[S0]]
+
+; CHECK-HARDFP-FULLFP16:       vadd.f16  s0, s0, s1
+; CHECK-HARDFP-FULLFP16-NEXT:  mov pc, lr
+
+}
+