[PATCH] D50454: [ARM] FP16: codegen support for VTRN

Sjoerd Meijer via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 8 08:59:35 PDT 2018


SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: samparker, olista01, john.brawn, efriedma.
Herald added a reviewer: javed.absar.
Herald added subscribers: chrib, kristof.beyls.

This should be the last patch to complete the FP16 vector intrinsics/instructions support (and finish PR38404).


https://reviews.llvm.org/D50454

Files:
  lib/Target/ARM/ARMISelDAGToDAG.cpp
  test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll


Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
===================================================================
--- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
+++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
@@ -1101,25 +1101,29 @@
   ret %struct.float16x8x2_t %.fca.0.1.insert
 }
 
-; FIXME (PR38404)
-;
-;define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
-;entry:
-;  %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-;  %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-;  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
-;  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
-;  ret %struct.float16x4x2_t %.fca.0.1.insert
-;}
-;
-;define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
-;entry:
-;  %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-;  %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-;  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
-;  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
-;  ret %struct.float16x8x2_t %.fca.0.1.insert
-;}
+define dso_local %struct.float16x4x2_t @test_vtrn_f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: test_vtrn_f16:
+; CHECK:         vtrn.16 d0, d1
+; CHECK-NEXT:    bx lr
+entry:
+  %vtrn.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
+  %vtrn1.i = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
+  %.fca.0.0.insert = insertvalue %struct.float16x4x2_t undef, <4 x half> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float16x4x2_t %.fca.0.0.insert, <4 x half> %vtrn1.i, 0, 1
+  ret %struct.float16x4x2_t %.fca.0.1.insert
+}
+
+define dso_local %struct.float16x8x2_t @test_vtrnq_f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test_vtrnq_f16:
+; CHECK:         vtrn.16 q0, q1
+; CHECK-NEXT:    bx lr
+entry:
+  %vtrn.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %vtrn1.i = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %.fca.0.0.insert = insertvalue %struct.float16x8x2_t undef, <8 x half> %vtrn.i, 0, 0
+  %.fca.0.1.insert = insertvalue %struct.float16x8x2_t %.fca.0.0.insert, <8 x half> %vtrn1.i, 0, 1
+  ret %struct.float16x8x2_t %.fca.0.1.insert
+}
 
 define dso_local <4 x half> @test_vmov_n_f16(float %a.coerce) {
 ; CHECK-LABEL: test_vmov_n_f16:
Index: lib/Target/ARM/ARMISelDAGToDAG.cpp
===================================================================
--- lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3076,10 +3076,12 @@
     switch (VT.getSimpleVT().SimpleTy) {
     default: return;
     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
+    case MVT::v4f16:
     case MVT::v4i16: Opc = ARM::VTRNd16; break;
     case MVT::v2f32:
     case MVT::v2i32: Opc = ARM::VTRNd32; break;
     case MVT::v16i8: Opc = ARM::VTRNq8; break;
+    case MVT::v8f16:
     case MVT::v8i16: Opc = ARM::VTRNq16; break;
     case MVT::v4f32:
     case MVT::v4i32: Opc = ARM::VTRNq32; break;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D50454.159735.patch
Type: text/x-patch
Size: 3516 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180808/84662b13/attachment.bin>


More information about the llvm-commits mailing list