[PATCH] D44675: [ARM] Codegen FP16 vmaxnm/vminnm scalar instructions

Tue Mar 20 03:30:04 PDT 2018

SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: samparker, olista01, t.p.northover.
Herald added subscribers: kristof.beyls, javed.absar.

https://reviews.llvm.org/D44675

Files:
  lib/Target/ARM/ARMISelLowering.cpp
  lib/Target/ARM/ARMInstrVFP.td
  test/CodeGen/ARM/fp16-instructions.ll


Index: test/CodeGen/ARM/fp16-instructions.ll
===================================================================

--- test/CodeGen/ARM/fp16-instructions.ll
+++ test/CodeGen/ARM/fp16-instructions.ll
@@ -28,6 +28,10 @@
 ; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -fp-contract=fast | FileCheck %s --check-prefixes=CHECK,CHECK-HARDFP-FULLFP16-FAST
 
+; FAST MATH
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mattr=+fullfp16 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=CHECK-FM
+; RUN: llc < %s -mtriple=arm-none-eabihf -mattr=+fullfp16 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=CHECK-FM
+
 
 define float @RetValBug(float %A.coerce) {
 entry:
@@ -477,9 +481,45 @@
 ; CHECK-HARDFP-FULLFP16-FAST-NEXT:  vmov.f32  s0, s2
 }
 
-; TODO:
 ; 17. VMAXNM
+define dso_local float @vmaxnm(float %a.coerce, float %b.coerce) {
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %2 = bitcast float %b.coerce to i32
+  %tmp1.0.extract.trunc = trunc i32 %2 to i16
+  %3 = bitcast i16 %tmp1.0.extract.trunc to half
+  %cmp = fcmp fast ogt half %1, %3
+  %cond = select i1 %cmp, half %1, half %3
+  %4 = bitcast half %cond to i16
+  %tmp4.0.insert.ext = zext i16 %4 to i32
+  %5 = bitcast i32 %tmp4.0.insert.ext to float
+  ret float %5
+
+; CHECK-FM-LABEL: vmaxnm:
+; CHECK-FM:       vmaxnm.f16  s0, s0, s1
+}
+
 ; 18. VMINNM
+define dso_local float @vminnm(float %a.coerce, float %b.coerce) {
+entry:
+  %0 = bitcast float %a.coerce to i32
+  %tmp.0.extract.trunc = trunc i32 %0 to i16
+  %1 = bitcast i16 %tmp.0.extract.trunc to half
+  %2 = bitcast float %b.coerce to i32
+  %tmp1.0.extract.trunc = trunc i32 %2 to i16
+  %3 = bitcast i16 %tmp1.0.extract.trunc to half
+  %cmp = fcmp fast ogt half %1, %3
+  %cond = select i1 %cmp, half %3, half %1
+  %4 = bitcast half %cond to i16
+  %tmp4.0.insert.ext = zext i16 %4 to i32
+  %5 = bitcast i32 %tmp4.0.insert.ext to float
+  ret float %5
+
+; CHECK-FM-LABEL: vminnm:
+; CHECK-FM:       vminnm.f16  s0, s0, s1
+}
 
 ; 19. VMLA
 define float @VMLA(float %a.coerce, float %b.coerce, float %c.coerce) {
Index: lib/Target/ARM/ARMInstrVFP.td
===================================================================
--- lib/Target/ARM/ARMInstrVFP.td
+++ lib/Target/ARM/ARMInstrVFP.td
@@ -479,9 +479,9 @@
 multiclass vmaxmin_inst<string op, bit opc, SDNode SD> {
   let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in {
     def H : AHbInp<0b11101, 0b00, opc,
-                   (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm),
+                   (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm),
                    NoItinerary, !strconcat(op, ".f16\t$Sd, $Sn, $Sm"),
-                   []>,
+                   [(set HPR:$Sd, (SD HPR:$Sn, HPR:$Sm))]>,
                    Requires<[HasFullFP16]>;
 
     def S : ASbInp<0b11101, 0b00, opc,
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -527,6 +527,9 @@
     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
     setOperationAction(ISD::BITCAST, MVT::f16, Custom);
+
+    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
   }
 
   for (MVT VT : MVT::vector_valuetypes()) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D44675.139094.patch
Type: text/x-patch
Size: 3621 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180320/7dce2192/attachment.bin>