[llvm] r326811 - [AArch64] define isExtractSubvectorCheap

Tue Mar 6 08:54:55 PST 2018

Author: spop
Date: Tue Mar  6 08:54:55 2018
New Revision: 326811

URL: http://llvm.org/viewvc/llvm-project?rev=326811&view=rev
Log:
[AArch64] define isExtractSubvectorCheap

Following the ARM-neon backend, define isExtractSubvectorCheap to return true
when extracting low and high part of a neon register.

The patch disables a test in llvm/test/CodeGen/AArch64/arm64-ext.ll This
testcase is fragile in the sense that it requires a BUILD_VECTOR to "survive"
all DAG transforms until ISelLowering. The testcase is supposed to check that
AArch64TargetLowering::ReconstructShuffle() works, and for that we need a
BUILD_VECTOR in ISelLowering. As we now transform the BUILD_VECTOR earlier into
an VEXT + vector_shuffle, we don't have the BUILD_VECTOR pattern when we get to
ISelLowering. As there is no way to disable the combiner to only exercise the
code in ISelLowering, the patch disables the testcase.

Differential revision: https://reviews.llvm.org/D43973

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll
    llvm/trunk/test/CodeGen/AArch64/arm64-ext.ll
    llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=326811&r1=326810&r2=326811&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Mar  6 08:54:55 2018
@@ -8200,6 +8200,14 @@ bool AArch64TargetLowering::shouldConver
   return Shift < 3;
 }
 
+bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+                                                    unsigned Index) const {
+  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
+    return false;
+
+  return (Index == 0 || Index == ResVT.getVectorNumElements());
+}
+
 /// Turn vector tests of the signbit in the form of:
 ///   xor (sra X, elt_size(X)-1), -1
 /// into:

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=326811&r1=326810&r2=326811&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Tue Mar  6 08:54:55 2018
@@ -365,6 +365,11 @@ public:
   bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                          Type *Ty) const override;
 
+  /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
+  /// with this index.
+  bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
+                               unsigned Index) const override;
+
   Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
                         AtomicOrdering Ord) const override;
   Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,

Modified: llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll?rev=326811&r1=326810&r2=326811&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-vuzp.ll Tue Mar  6 08:54:55 2018
@@ -4,7 +4,6 @@ declare <16 x i8> @llvm.aarch64.neon.tbl
 
 ; CHECK-LABEL: fun1:
 ; CHECK: uzp1 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NOT: mov
 define i32 @fun1() {
 entry:
   %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
@@ -16,7 +15,6 @@ entry:
 
 ; CHECK-LABEL: fun2:
 ; CHECK: uzp2 {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b
-; CHECK-NOT: mov
 define i32 @fun2() {
 entry:
   %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
@@ -28,7 +26,6 @@ entry:
 
 ; CHECK-LABEL: fun3:
 ; CHECK-NOT: uzp1
-; CHECK: mov
 define i32 @fun3() {
 entry:
   %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)
@@ -40,7 +37,6 @@ entry:
 
 ; CHECK-LABEL: fun4:
 ; CHECK-NOT: uzp2
-; CHECK: mov
 define i32 @fun4() {
 entry:
   %vtbl1.i.1 = tail call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> <i8 0, i8 16, i8 19, i8 4, i8 -65, i8 -65, i8 -71, i8 -71, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> undef)

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-ext.ll?rev=326811&r1=326810&r2=326811&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-ext.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-ext.ll Tue Mar  6 08:54:55 2018
@@ -94,19 +94,6 @@ define <8 x i16> @test_vextRq_undef2(<8
 ; Tests for ReconstructShuffle function. Indices have to be carefully
 ; chosen to reach lowering phase as a BUILD_VECTOR.
 
-; One vector needs vext, the other can be handled by extract_subvector
-; Also checks interleaving of sources is handled correctly.
-; Essence: a vext is used on %A and something saner than stack load/store for final result.
-define <4 x i16> @test_interleaved(<8 x i16>* %A, <8 x i16>* %B) nounwind {
-;CHECK-LABEL: test_interleaved:
-;CHECK: ext.8b
-;CHECK: zip1.4h
-        %tmp1 = load <8 x i16>, <8 x i16>* %A
-        %tmp2 = load <8 x i16>, <8 x i16>* %B
-        %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <4 x i32> <i32 3, i32 8, i32 5, i32 9>
-        ret <4 x i16> %tmp3
-}
-
 ; An undef in the shuffle list should still be optimizable
 define <4 x i16> @test_undef(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK-LABEL: test_undef:

Modified: llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll?rev=326811&r1=326810&r2=326811&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-scalar-copy.ll Tue Mar  6 08:54:55 2018
@@ -79,7 +79,7 @@ define half @test_dup_hv8H_0(<8 x half>
 
 define <1 x i8> @test_vector_dup_bv16B(<16 x i8> %v1) #0 {
  ; CHECK-LABEL: test_vector_dup_bv16B:
- ; CHECK-NEXT: dup v0.16b, v0.b[14]
+ ; CHECK-NEXT: dup v0.8b, v0.b[14]
  ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <16 x i8> %v1, <16 x i8> undef, <1 x i32> <i32 14> 
  ret <1 x i8> %shuffle.i
@@ -95,7 +95,7 @@ define <1 x i8> @test_vector_dup_bv8B(<8
 
 define <1 x i16> @test_vector_dup_hv8H(<8 x i16> %v1) #0 {
  ; CHECK-LABEL: test_vector_dup_hv8H:
- ; CHECK-NEXT:	dup v0.8h, v0.h[7]
+ ; CHECK-NEXT:	dup v0.4h, v0.h[7]
  ; CHECK-NEXT: ret
  %shuffle.i = shufflevector <8 x i16> %v1, <8 x i16> undef, <1 x i32> <i32 7> 
  ret <1 x i16> %shuffle.i
@@ -111,7 +111,7 @@ define <1 x i16> @test_vector_dup_hv4H(<
 
 define <1 x i32> @test_vector_dup_sv4S(<4 x i32> %v1) #0 {
  ; CHECK-LABEL: test_vector_dup_sv4S:
- ; CHECK-NEXT: dup v0.4s, v0.s[3]
+ ; CHECK-NEXT: dup v0.2s, v0.s[3]
  ; CHECK-NEXT: ret
  %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <1 x i32> <i32 3> 
  ret <1 x i32> %shuffle
@@ -135,7 +135,7 @@ define <1 x i64> @test_vector_dup_dv2D(<
 
 define <1 x i64> @test_vector_copy_dup_dv2D(<1 x i64> %a, <2 x i64> %c) #0 {
   ; CHECK-LABEL: test_vector_copy_dup_dv2D:
-  ; CHECK-NEXT: dup v0.2d, v1.d[1]
+  ; CHECK-NEXT: ext {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, #8
   ; CHECK-NEXT: ret
   %vget_lane = extractelement <2 x i64> %c, i32 1
   %vset_lane = insertelement <1 x i64> undef, i64 %vget_lane, i32 0