[llvm] r339241 - [ARM] FP16: codegen support for VEXT

Wed Aug 8 06:26:38 PDT 2018

Author: sjoerdmeijer
Date: Wed Aug  8 06:26:38 2018
New Revision: 339241

URL: http://llvm.org/viewvc/llvm-project?rev=339241&view=rev
Log:
[ARM] FP16: codegen support for VEXT

Differential Revision: https://reviews.llvm.org/D50427

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
    llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=339241&r1=339240&r2=339241&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Aug  8 06:26:38 2018
@@ -6678,13 +6678,14 @@ def VEXTd16 : VEXTd<"vext", "16", v4i16,
   let Inst{10-9} = index{1-0};
   let Inst{8}    = 0b0;
 }
+def : Pat<(v4f16 (NEONvext (v4f16 DPR:$Vn), (v4f16 DPR:$Vm), (i32 imm:$index))),
+          (VEXTd16 DPR:$Vn, DPR:$Vm, imm:$index)>;
+
 def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> {
   let Inst{10}     = index{0};
   let Inst{9-8}    = 0b00;
 }
-def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
-                           (v2f32 DPR:$Vm),
-                           (i32 imm:$index))),
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), (v2f32 DPR:$Vm), (i32 imm:$index))),
           (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
 
 def VEXTq8  : VEXTq<"vext", "8",  v16i8, imm0_15> {
@@ -6694,6 +6695,9 @@ def VEXTq16 : VEXTq<"vext", "16", v8i16,
   let Inst{11-9} = index{2-0};
   let Inst{8}    = 0b0;
 }
+def : Pat<(v8f16 (NEONvext (v8f16 QPR:$Vn), (v8f16 QPR:$Vm), (i32 imm:$index))),
+          (VEXTq16 QPR:$Vn, QPR:$Vm, imm:$index)>;
+
 def VEXTq32 : VEXTq<"vext", "32", v4i32, imm0_3> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
@@ -6702,9 +6706,7 @@ def VEXTq64 : VEXTq<"vext", "64", v2i64,
   let Inst{11} = index{0};
   let Inst{10-8}    = 0b000;
 }
-def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
-                           (v4f32 QPR:$Vm),
-                           (i32 imm:$index))),
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn), (v4f32 QPR:$Vm), (i32 imm:$index))),
           (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
 
 //   VTRN     : Vector Transpose

Modified: llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll?rev=339241&r1=339240&r2=339241&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll Wed Aug  8 06:26:38 2018
@@ -1191,20 +1191,26 @@ entry:
   ret <8 x half> %shuffle
 }
 
+define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: test_vext_f16:
+; CHECK:         vext.16 d0, d0, d1, #2
+; CHECK-NEXT:    bx lr
+entry:
+  %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
+  ret <4 x half> %vext
+}
+
+define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test_vextq_f16:
+; CHECK:         vext.16 q0, q0, q1, #5
+; CHECK-NEXT:    bx lr
+entry:
+  %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+  ret <8 x half> %vext
+}
+
 ; FIXME (PR38404)
 ;
-;define dso_local <4 x half> @test_vext_f16(<4 x half> %a, <4 x half> %b) {
-;entry:
-;  %vext = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-;  ret <4 x half> %vext
-;}
-;
-;define dso_local <8 x half> @test_vextq_f16(<8 x half> %a, <8 x half> %b) {
-;entry:
-;  %vext = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
-;  ret <8 x half> %vext
-;}
-;
 ;define dso_local <4 x half> @test_vrev64_f16(<4 x half> %a) {
 ;entry:
 ;  %shuffle.i = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>