[PATCH][X86] Add ISel patterns to select 'fp32_to_fp16' and 'fp16_to_fp32' dag nodes.

Fri Jun 27 14:52:31 PDT 2014

ping.


Thanks,
Andrea

On Sat, Jun 21, 2014 at 1:25 AM, Andrea Di Biagio
<andrea.dibiagio at gmail.com> wrote:
> Hi Jim,
> sorry for the multiple emails...
>
> I think I now understand the problem in the code.
>
> This line:
>
> +  if (!TM.Options.UseSoftFloat && !Subtarget->hasF16C()) {
>
> should have been instead:
>
> +  if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
>
> Basically, the idea is:
> if -soft-float=1, then we always expand the float conversions into
> calls to the runtime library.
> Otherwise, we check for feature F16C; if F16C is not available, then we expand
> the float conversions into libcalls.
>
> I attached a new version of the patch that fixes the problem and adds two new
> tests in cvt16.ll to verify that we always emit libcalls when flag
> -soft-float=1 is specified.
>
> Please let me know what you think.
>
> Thanks!
> Andrea
>
> On Fri, Jun 20, 2014 at 11:59 PM, Andrea Di Biagio
> <andrea.dibiagio at gmail.com> wrote:
>> Do you think it would it be ok if I just remove the check for soft-float?
>>
>>
>> On Fri, Jun 20, 2014 at 11:29 PM, Andrea Di Biagio
>> <andrea.dibiagio at gmail.com> wrote:
>>> Hi Jim,
>>>
>>> On Fri, Jun 20, 2014 at 9:32 PM, Jim Grosbach <grosbach at apple.com> wrote:
>>>> I agree calling library helpers is the correct fallback here.
>>>>
>>>> What’s the behavior with soft-float? Specifically,
>>>>
>>>> +  if (!TM.Options.UseSoftFloat && !Subtarget->hasF16C()) {
>>>
>>> I have just run some tests (using the test-case from this patch)
>>> with/without flag -float-abi=soft/hard.
>>> The presence/absence of that flag doesn't seem to affect the output.
>>>
>>> My originally idea was to generate library calls for
>>> float-to-half-float conversions if 'soft' float-abi was specified.
>>> I thought that the presence of that flag would have affected the
>>> codegen for those dag nodes. But apparently I was wrong.
>>>
>>>>
>>>> -Jim
>>>>
>>>>> On Jun 19, 2014, at 6:53 AM, Andrea Di Biagio <andrea.dibiagio at gmail.com> wrote:
>>>>>
>>>>> Hi,
>>>>>
>>>>> this patch:
>>>>> 1) Adds tablegen patterns to select F16C float-to-half-float
>>>>> conversion instructions (and vice versa) from 'f32_to_f16' (and
>>>>> 'f16_to_f32') dag nodes;
>>>>>
>>>>> 2) Teaches the backend how to emit compiler runtime library call to
>>>>> __gnu_f2h_ieee and __gnu_h2f_ieee if there is no F16C support.
>>>>>
>>>>> About point 2.
>>>>> I am not sure if there is a better ways to fix/workaround this
>>>>> problem. In general, my opinion is that the backend shouldn't raise a
>>>>> error if we don't have F16C and the DAG contains
>>>>> 'f32_to_f16/f16_to_f32' nodes.
>>>>>
>>>>> The compiler already knows about the existence of libcalls
>>>>> '__gnu_f2h_ieee' and '__gnu_h2f_ieee2' (see TargetLoweringBase.cpp).
>>>>> So, my idea (if you agree) is to generate runtime calls instead of
>>>>> returning an ISel error.
>>>>> However, this could result later on in a linker error if the runtime
>>>>> library doesn't define '__gnu_f2h_ieee' and '__gnu_h2f_ieee'.
>>>>>
>>>>> Please let me know what you think.
>>>>>
>>>>> Thanks!
>>>>> Andrea Di Biagio
>>>>> <patch-f16c-conv.diff>
>>>>
-------------- next part --------------
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================

--- lib/Target/X86/X86ISelLowering.cpp	(revision 211962)
+++ lib/Target/X86/X86ISelLowering.cpp	(working copy)
@@ -509,6 +509,14 @@
     }
   }
 
+  // Special handling for half-precision floating point conversions.
+  // If we don't have F16C support, then lower half float conversions
+  // into library calls.
+  if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) {
+    setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
+    setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand);
+  }
+
   if (Subtarget->hasPOPCNT()) {
     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
   } else {
Index: lib/Target/X86/X86InstrSSE.td
===================================================================
--- lib/Target/X86/X86InstrSSE.td	(revision 211962)
+++ lib/Target/X86/X86InstrSSE.td	(working copy)
@@ -8538,6 +8538,21 @@
             (VCVTPH2PSrm addr:$src)>;
 }
 
+// Patterns for  matching conversions from float to half-float and vice versa.
+let Predicates = [HasF16C] in {
+  def : Pat<(f32_to_f16 FR32:$src),
+            (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr
+              (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>;
+
+  def : Pat<(f16_to_f32 GR16:$src),
+            (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
+              (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >;
+
+  def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))),
+            (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr
+              (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX2 Instructions
 //===----------------------------------------------------------------------===//
Index: test/CodeGen/X86/cvt16.ll
===================================================================
--- test/CodeGen/X86/cvt16.ll	(revision 0)
+++ test/CodeGen/X86/cvt16.ll	(working copy)
@@ -0,0 +1,64 @@
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=LIBCALL
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=F16C
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=-f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -soft-float=1 -mattr=+f16c | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFLOAT
+
+; This is a test for float to half float conversions on x86-64.
+;
+; If flag -soft-float is set, or if there is no F16C support, then:
+; 1) half float to float conversions are
+;    translated into calls to __gnu_h2f_ieee defined
+;    by the compiler runtime library;
+; 2) float to half float conversions are translated into calls
+;    to __gnu_f2h_ieee which expected to be defined by the
+;    compiler runtime library.
+;
+; Otherwise (we have F16C support):
+; 1) half float to float conversion are translated using
+;    vcvtph2ps instructions;
+; 2) float to half float conversions are translated using
+;    vcvtps2ph instructions
+
+
+define void @test1(float %src, i16* %dest) {
+  %1 = tail call i16 @llvm.convert.to.fp16(float %src)
+  store i16 %1, i16* %dest, align 2
+  ret void
+}
+; CHECK-LABEL: test1
+; LIBCALL: callq  __gnu_f2h_ieee
+; SOFTFLOAT: callq  __gnu_f2h_ieee
+; F16C: vcvtps2ph
+; CHECK: ret
+
+
+define float @test2(i16* nocapture %src) {
+  %1 = load i16* %src, align 2
+  %2 = tail call float @llvm.convert.from.fp16(i16 %1)
+  ret float %2
+}
+; CHECK-LABEL: test2:
+; LIBCALL: jmp  __gnu_h2f_ieee
+; SOFTFLOAT: callq  __gnu_h2f_ieee
+; F16C: vcvtph2ps
+; F16C: ret
+
+
+define float @test3(float %src) nounwind uwtable readnone {
+  %1 = tail call i16 @llvm.convert.to.fp16(float %src)
+  %2 = tail call float @llvm.convert.from.fp16(i16 %1)
+  ret float %2
+}
+
+; CHECK-LABEL: test3:
+; LIBCALL: callq  __gnu_f2h_ieee
+; LIBCALL: jmp   __gnu_h2f_ieee
+; SOFTFLOAT: callq  __gnu_f2h_ieee
+; SOFTFLOAT: callq  __gnu_h2f_ieee
+; F16C: vcvtps2ph
+; F16C-NEXT: vcvtph2ps
+; F16C: ret
+
+declare float @llvm.convert.from.fp16(i16) nounwind readnone
+declare i16 @llvm.convert.to.fp16(float) nounwind readnone
+