[llvm] r240118 - [ARM] Look through concat when lowering in-place shuffles (VZIP, ..)

Ahmed Bougacha ahmed.bougacha at gmail.com
Thu Jun 18 19:32:36 PDT 2015


Author: ab
Date: Thu Jun 18 21:32:35 2015
New Revision: 240118

URL: http://llvm.org/viewvc/llvm-project?rev=240118&view=rev
Log:
[ARM] Look through concat when lowering in-place shuffles (VZIP, ..)

Currently, we canonicalize shuffles that produce a result larger than
their operands with:
  shuffle(concat(v1, undef), concat(v2, undef))
->
  shuffle(concat(v1, v2), undef)

because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).

This is useful in the general case, but there are special cases where
native shuffles produce larger results: the two-result ops.

We can look through the concat when lowering them:
  shuffle(concat(v1, v2), undef)
->
  concat(VZIP(v1, v2):0, :1)

This lets us generate the native shuffles instead of scalarizing to
dozens of VMOVs.

Differential Revision: http://reviews.llvm.org/D10424

Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/test/CodeGen/ARM/vtrn.ll
    llvm/trunk/test/CodeGen/ARM/vuzp.ll
    llvm/trunk/test/CodeGen/ARM/vzip.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=240118&r1=240117&r2=240118&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Thu Jun 18 21:32:35 2015
@@ -5715,6 +5715,44 @@ static SDValue LowerVECTOR_SHUFFLE(SDVal
           .getValue(WhichResult);
     }
 
+    // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
+    // shuffles that produce a result larger than their operands with:
+    //   shuffle(concat(v1, undef), concat(v2, undef))
+    // ->
+    //   shuffle(concat(v1, v2), undef)
+    // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
+    //
+    // This is useful in the general case, but there are special cases where
+    // native shuffles produce larger results: the two-result ops.
+    //
+    // Look through the concat when lowering them:
+    //   shuffle(concat(v1, v2), undef)
+    // ->
+    //   concat(VZIP(v1, v2):0, :1)
+    //
+    if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
+        V2->getOpcode() == ISD::UNDEF) {
+      SDValue SubV1 = V1->getOperand(0);
+      SDValue SubV2 = V1->getOperand(1);
+      EVT SubVT = SubV1.getValueType();
+
+      // We expect these to have been canonicalized to -1.
+      assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
+        return i < (int)VT.getVectorNumElements();
+      }) && "Unexpected shuffle index into UNDEF operand!");
+
+      if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
+              ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
+        if (isV_UNDEF)
+          SubV2 = SubV1;
+        assert((WhichResult == 0) &&
+               "In-place shuffle of concat can only have one result!");
+        SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
+                                  SubV1, SubV2);
+        return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
+                           Res.getValue(1));
+      }
+    }
   }
 
   // If the shuffle is not directly supported and it has 4 elements, use

Modified: llvm/trunk/test/CodeGen/ARM/vtrn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vtrn.ll?rev=240118&r1=240117&r2=240118&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vtrn.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vtrn.ll Thu Jun 18 21:32:35 2015
@@ -20,40 +20,9 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8
 define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vtrni8_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d19, [r0]
-; CHECK-NEXT:    vldr d18, [r1]
-; CHECK-NEXT:    vmov.u8 r0, d19[0]
-; CHECK-NEXT:    vmov.8 d16[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[0]
-; CHECK-NEXT:    vmov.8 d16[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[2]
-; CHECK-NEXT:    vmov.8 d16[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[2]
-; CHECK-NEXT:    vmov.8 d16[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[4]
-; CHECK-NEXT:    vmov.8 d16[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[4]
-; CHECK-NEXT:    vmov.8 d16[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[6]
-; CHECK-NEXT:    vmov.8 d16[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[6]
-; CHECK-NEXT:    vmov.8 d16[7], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[1]
-; CHECK-NEXT:    vmov.8 d17[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[1]
-; CHECK-NEXT:    vmov.8 d17[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[3]
-; CHECK-NEXT:    vmov.8 d17[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[3]
-; CHECK-NEXT:    vmov.8 d17[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[5]
-; CHECK-NEXT:    vmov.8 d17[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[5]
-; CHECK-NEXT:    vmov.8 d17[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[7]
-; CHECK-NEXT:    vmov.8 d17[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[7]
-; CHECK-NEXT:    vmov.8 d17[7], r0
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vtrn.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -83,26 +52,11 @@ define <4 x i16> @vtrni16(<4 x i16>* %A,
 define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: vtrni16_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d16, [r0]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vmov.u16 r0, d16[0]
-; CHECK-NEXT:    vmov.16 d18[0], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[0]
-; CHECK-NEXT:    vmov.16 d18[1], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[2]
-; CHECK-NEXT:    vmov.16 d18[2], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[2]
-; CHECK-NEXT:    vmov.16 d18[3], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[1]
-; CHECK-NEXT:    vmov.16 d19[0], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[1]
-; CHECK-NEXT:    vmov.16 d19[1], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[3]
-; CHECK-NEXT:    vmov.16 d19[2], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[3]
-; CHECK-NEXT:    vmov.16 d19[3], r0
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vtrn.16 d16, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -132,8 +86,7 @@ define <4 x i32> @vtrni32_Qres(<2 x i32>
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vldr d17, [r1]
 ; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vrev64.32 q9, q8
-; CHECK-NEXT:    vuzp.32 q8, q9
+; CHECK-NEXT:    vtrn.32 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -165,8 +118,7 @@ define <4 x float> @vtrnf_Qres(<2 x floa
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vldr d17, [r1]
 ; CHECK-NEXT:    vldr d16, [r0]
-; CHECK-NEXT:    vrev64.32 q9, q8
-; CHECK-NEXT:    vuzp.32 q8, q9
+; CHECK-NEXT:    vtrn.32 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -329,32 +281,9 @@ define <8 x i8> @vtrni8_undef(<8 x i8>*
 define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vtrni8_undef_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vldr d19, [r1]
-; CHECK-NEXT:    vmov.u8 r0, d18[0]
-; CHECK-NEXT:    vmov.8 d16[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[2]
-; CHECK-NEXT:    vmov.8 d16[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[2]
-; CHECK-NEXT:    vmov.8 d16[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[4]
-; CHECK-NEXT:    vmov.8 d16[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[6]
-; CHECK-NEXT:    vmov.8 d16[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[6]
-; CHECK-NEXT:    vmov.8 d16[7], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[1]
-; CHECK-NEXT:    vmov.8 d17[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[1]
-; CHECK-NEXT:    vmov.8 d17[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[3]
-; CHECK-NEXT:    vmov.8 d17[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[3]
-; CHECK-NEXT:    vmov.8 d17[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[5]
-; CHECK-NEXT:    vmov.8 d17[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[7]
-; CHECK-NEXT:    vmov.8 d17[7], r0
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vtrn.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr

Modified: llvm/trunk/test/CodeGen/ARM/vuzp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vuzp.ll?rev=240118&r1=240117&r2=240118&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vuzp.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vuzp.ll Thu Jun 18 21:32:35 2015
@@ -20,40 +20,9 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8
 define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vuzpi8_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d19, [r0]
-; CHECK-NEXT:    vldr d18, [r1]
-; CHECK-NEXT:    vmov.u8 r0, d19[0]
-; CHECK-NEXT:    vmov.8 d16[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[2]
-; CHECK-NEXT:    vmov.8 d16[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[4]
-; CHECK-NEXT:    vmov.8 d16[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[6]
-; CHECK-NEXT:    vmov.8 d16[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[0]
-; CHECK-NEXT:    vmov.8 d16[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[2]
-; CHECK-NEXT:    vmov.8 d16[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[4]
-; CHECK-NEXT:    vmov.8 d16[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[6]
-; CHECK-NEXT:    vmov.8 d16[7], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[1]
-; CHECK-NEXT:    vmov.8 d17[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[3]
-; CHECK-NEXT:    vmov.8 d17[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[5]
-; CHECK-NEXT:    vmov.8 d17[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[7]
-; CHECK-NEXT:    vmov.8 d17[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[1]
-; CHECK-NEXT:    vmov.8 d17[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[3]
-; CHECK-NEXT:    vmov.8 d17[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[5]
-; CHECK-NEXT:    vmov.8 d17[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[7]
-; CHECK-NEXT:    vmov.8 d17[7], r0
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vuzp.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -83,26 +52,11 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A,
 define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: vuzpi16_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d16, [r0]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vmov.u16 r0, d16[0]
-; CHECK-NEXT:    vmov.16 d18[0], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[2]
-; CHECK-NEXT:    vmov.16 d18[1], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[0]
-; CHECK-NEXT:    vmov.16 d18[2], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[2]
-; CHECK-NEXT:    vmov.16 d18[3], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[1]
-; CHECK-NEXT:    vmov.16 d19[0], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[3]
-; CHECK-NEXT:    vmov.16 d19[1], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[1]
-; CHECK-NEXT:    vmov.16 d19[2], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[3]
-; CHECK-NEXT:    vmov.16 d19[3], r0
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vuzp.16 d16, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -266,32 +220,9 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>*
 define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vuzpi8_undef_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vldr d19, [r1]
-; CHECK-NEXT:    vmov.u8 r0, d18[0]
-; CHECK-NEXT:    vmov.8 d16[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[2]
-; CHECK-NEXT:    vmov.8 d16[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[0]
-; CHECK-NEXT:    vmov.8 d16[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[2]
-; CHECK-NEXT:    vmov.8 d16[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[4]
-; CHECK-NEXT:    vmov.8 d16[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[6]
-; CHECK-NEXT:    vmov.8 d16[7], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[1]
-; CHECK-NEXT:    vmov.8 d17[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[3]
-; CHECK-NEXT:    vmov.8 d17[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[5]
-; CHECK-NEXT:    vmov.8 d17[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[7]
-; CHECK-NEXT:    vmov.8 d17[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[5]
-; CHECK-NEXT:    vmov.8 d17[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[7]
-; CHECK-NEXT:    vmov.8 d17[7], r0
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vuzp.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr

Modified: llvm/trunk/test/CodeGen/ARM/vzip.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vzip.ll?rev=240118&r1=240117&r2=240118&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vzip.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vzip.ll Thu Jun 18 21:32:35 2015
@@ -20,40 +20,9 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8
 define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vzipi8_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d19, [r0]
-; CHECK-NEXT:    vldr d18, [r1]
-; CHECK-NEXT:    vmov.u8 r0, d19[0]
-; CHECK-NEXT:    vmov.8 d16[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[0]
-; CHECK-NEXT:    vmov.8 d16[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[1]
-; CHECK-NEXT:    vmov.8 d16[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[1]
-; CHECK-NEXT:    vmov.8 d16[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[2]
-; CHECK-NEXT:    vmov.8 d16[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[2]
-; CHECK-NEXT:    vmov.8 d16[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[3]
-; CHECK-NEXT:    vmov.8 d16[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[3]
-; CHECK-NEXT:    vmov.8 d16[7], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[4]
-; CHECK-NEXT:    vmov.8 d17[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[4]
-; CHECK-NEXT:    vmov.8 d17[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[5]
-; CHECK-NEXT:    vmov.8 d17[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[5]
-; CHECK-NEXT:    vmov.8 d17[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[6]
-; CHECK-NEXT:    vmov.8 d17[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[6]
-; CHECK-NEXT:    vmov.8 d17[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[7]
-; CHECK-NEXT:    vmov.8 d17[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[7]
-; CHECK-NEXT:    vmov.8 d17[7], r0
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vzip.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
@@ -83,26 +52,11 @@ define <4 x i16> @vzipi16(<4 x i16>* %A,
 define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: vzipi16_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d16, [r0]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vmov.u16 r0, d16[0]
-; CHECK-NEXT:    vmov.16 d18[0], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[0]
-; CHECK-NEXT:    vmov.16 d18[1], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[1]
-; CHECK-NEXT:    vmov.16 d18[2], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[1]
-; CHECK-NEXT:    vmov.16 d18[3], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[2]
-; CHECK-NEXT:    vmov.16 d19[0], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[2]
-; CHECK-NEXT:    vmov.16 d19[1], r0
-; CHECK-NEXT:    vmov.u16 r0, d16[3]
-; CHECK-NEXT:    vmov.16 d19[2], r0
-; CHECK-NEXT:    vmov.u16 r0, d17[3]
-; CHECK-NEXT:    vmov.16 d19[3], r0
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vzip.16 d16, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -266,32 +220,9 @@ define <8 x i8> @vzipi8_undef(<8 x i8>*
 define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ; CHECK-LABEL: vzipi8_undef_Qres:
 ; CHECK:       @ BB#0:
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vldr d19, [r1]
-; CHECK-NEXT:    vmov.u8 r0, d18[0]
-; CHECK-NEXT:    vmov.8 d16[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[1]
-; CHECK-NEXT:    vmov.8 d16[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[1]
-; CHECK-NEXT:    vmov.8 d16[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[2]
-; CHECK-NEXT:    vmov.8 d16[5], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[3]
-; CHECK-NEXT:    vmov.8 d16[6], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[3]
-; CHECK-NEXT:    vmov.8 d16[7], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[4]
-; CHECK-NEXT:    vmov.8 d17[0], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[4]
-; CHECK-NEXT:    vmov.8 d17[1], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[5]
-; CHECK-NEXT:    vmov.8 d17[2], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[5]
-; CHECK-NEXT:    vmov.8 d17[3], r0
-; CHECK-NEXT:    vmov.u8 r0, d18[6]
-; CHECK-NEXT:    vmov.8 d17[4], r0
-; CHECK-NEXT:    vmov.u8 r0, d19[7]
-; CHECK-NEXT:    vmov.8 d17[7], r0
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d16, [r0]
+; CHECK-NEXT:    vzip.8 d16, d17
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr





More information about the llvm-commits mailing list