[llvm] r324360 - [ARM] f16 conversions

Tue Feb 6 08:28:43 PST 2018

Author: sjoerdmeijer
Date: Tue Feb  6 08:28:43 2018
New Revision: 324360

URL: http://llvm.org/viewvc/llvm-project?rev=324360&view=rev
Log:
[ARM] f16 conversions

This is a follow up of r324321, adding f16 <-> f32 and f16 <-> f64 conversion
match patterns.

Differential Revision: https://reviews.llvm.org/D42954

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=324360&r1=324359&r2=324360&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Tue Feb  6 08:28:43 2018
@@ -675,19 +675,26 @@ def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins
 // Between half, single and double-precision.
 def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm",
-                 [ /* intentionally left blank, see rule below */ ]>,
+                 [/* Intentionally left blank, see patterns below */]>,
                  Requires<[HasFP16]>,
              Sched<[WriteFPCVT]>;
 
 def : FullFP16Pat<(f32 (fpextend HPR:$Sm)),
                   (VCVTBHS (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+def : FP16Pat<(f16_to_fp GPR:$a),
+              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
 
 def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm",
-                 []>,
+                 [/* Intentionally left blank, see patterns below */]>,
                  Requires<[HasFP16]>,
              Sched<[WriteFPCVT]>;
 
+def : FullFP16Pat<(f16 (fpround SPR:$Sm)),
+                  (COPY_TO_REGCLASS (VCVTBSH SPR:$Sm), HPR)>;
+def : FP16Pat<(fp_to_f16 SPR:$a),
+              (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
+
 def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm),
                  /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm",
                  [/* For disassembly only; pattern left blank */]>,
@@ -703,7 +710,8 @@ def VCVTTSH: ASuI<0b11101, 0b11, 0b0011,
 def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
                    NoItinerary, "vcvtb", ".f64.f16\t$Dd, $Sm",
-                   []>, Requires<[HasFPARMv8, HasDPVFP]>,
+                   [/* Intentionally left blank, see patterns below */]>,
+                   Requires<[HasFPARMv8, HasDPVFP]>,
               Sched<[WriteFPCVT]> {
   // Instruction operands.
   bits<5> Sm;
@@ -713,10 +721,16 @@ def VCVTBHD : ADuI<0b11101, 0b11, 0b0010
   let Inst{5}   = Sm{0};
 }
 
+def : FullFP16Pat<(f64 (fpextend HPR:$Sm)),
+                  (VCVTBHD (COPY_TO_REGCLASS HPR:$Sm, SPR))>;
+def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
+              (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
+
 def VCVTBDH : ADuI<0b11101, 0b11, 0b0011, 0b01, 0,
                    (outs SPR:$Sd), (ins DPR:$Dm),
                    NoItinerary, "vcvtb", ".f16.f64\t$Sd, $Dm",
-                   []>, Requires<[HasFPARMv8, HasDPVFP]> {
+                   [/* Intentionally left blank, see patterns below */]>,
+                   Requires<[HasFPARMv8, HasDPVFP]> {
   // Instruction operands.
   bits<5> Sd;
   bits<5> Dm;
@@ -728,6 +742,11 @@ def VCVTBDH : ADuI<0b11101, 0b11, 0b0011
   let Inst{22}      = Sd{0};
 }
 
+def : FullFP16Pat<(f16 (fpround DPR:$Dm)),
+                  (COPY_TO_REGCLASS (VCVTBDH DPR:$Dm), HPR)>;
+def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
+              (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
+
 def VCVTTHD : ADuI<0b11101, 0b11, 0b0010, 0b11, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
                    NoItinerary, "vcvtt", ".f64.f16\t$Dd, $Sm",
@@ -755,18 +774,6 @@ def VCVTTDH : ADuI<0b11101, 0b11, 0b0011
   let Inst{5}     = Dm{4};
 }
 
-def : FP16Pat<(fp_to_f16 SPR:$a),
-              (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>;
-
-def : FP16Pat<(fp_to_f16 (f64 DPR:$a)),
-              (i32 (COPY_TO_REGCLASS (VCVTBDH DPR:$a), GPR))>;
-
-def : FP16Pat<(f16_to_fp GPR:$a),
-              (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>;
-
-def : FP16Pat<(f64 (f16_to_fp GPR:$a)),
-              (VCVTBHD (COPY_TO_REGCLASS GPR:$a, SPR))>;
-
 multiclass vcvt_inst<string opc, bits<2> rm,
                      SDPatternOperator node = null_frag> {
   let PostEncoderMethod = "", DecoderNamespace = "VFPV8" in {

Modified: llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll?rev=324360&r1=324359&r2=324360&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll Tue Feb  6 08:28:43 2018
@@ -212,6 +212,51 @@ entry:
 ; CHECK-HARDFP-FULLFP16-NEXT:  vcvt.f16.s32  s0, s0
 }
 
+define i32 @f2h(float %f) {
+entry:
+  %conv = fptrunc float %f to half
+  %0 = bitcast half %conv to i16
+  %tmp.0.insert.ext = zext i16 %0 to i32
+  ret i32 %tmp.0.insert.ext
+
+; CHECK-LABEL:            f2h:
+; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f32 s0, s0
+}
+
+define float @h2f(i32 %h.coerce) {
+entry:
+  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
+  %0 = bitcast i16 %tmp.0.extract.trunc to half
+  %conv = fpext half %0 to float
+  ret float %conv
+
+; CHECK-LABEL:            h2f:
+; CHECK-HARDFP-FULLFP16:  vcvtb.f32.f16 s0, s0
+}
+
+
+define double @h2d(i32 %h.coerce) {
+entry:
+  %tmp.0.extract.trunc = trunc i32 %h.coerce to i16
+  %0 = bitcast i16 %tmp.0.extract.trunc to half
+  %conv = fpext half %0 to double
+  ret double %conv
+
+; CHECK-LABEL:            h2d:
+; CHECK-HARDFP-FULLFP16:  vcvtb.f64.f16 d{{.*}}, s{{.}}
+}
+
+define i32 @d2h(double %d) {
+entry:
+  %conv = fptrunc double %d to half
+  %0 = bitcast half %conv to i16
+  %tmp.0.insert.ext = zext i16 %0 to i32
+  ret i32 %tmp.0.insert.ext
+
+; CHECK-LABEL:            d2h:
+; CHECK-HARDFP-FULLFP16:  vcvtb.f16.f64 s0, d{{.*}}
+}
+
 ; TODO:
 ; 7.  VCVTA
 ; 8.  VCVTM