[llvm] r256922 - [X86][SSE] There is no zmm addsubpd/addsubps instruction.

Wed Jan 6 01:08:49 PST 2016

Author: rksimon
Date: Wed Jan  6 03:08:49 2016
New Revision: 256922

URL: http://llvm.org/viewvc/llvm-project?rev=256922&view=rev
Log:
[X86][SSE] There is no zmm addsubpd/addsubps instruction.

Replace the assert in combineShuffleToAddSub with an early out.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/sse3-avx-addsub.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=256922&r1=256921&r2=256922&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jan  6 03:08:49 2016
@@ -23684,9 +23684,13 @@ static SDValue PerformTargetShuffleCombi
 /// the operands which explicitly discard the lanes which are unused by this
 /// operation to try to flow through the rest of the combiner the fact that
 /// they're unused.
-static SDValue combineShuffleToAddSub(SDNode *N, SelectionDAG &DAG) {
+static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget *Subtarget,
+                                      SelectionDAG &DAG) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
+  if ((!Subtarget->hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
+      (!Subtarget->hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)))
+    return SDValue();
 
   // We only handle target-independent shuffles.
   // FIXME: It would be easy and harmless to use the target shuffle mask
@@ -23728,12 +23732,6 @@ static SDValue combineShuffleToAddSub(SD
         isShuffleEquivalent(V1, V2, Mask, {0, 9, 2, 11, 4, 13, 6, 15})))
     return SDValue();
 
-  // Only specific types are legal at this point, assert so we notice if and
-  // when these change.
-  assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v8f32 ||
-          VT == MVT::v4f64) &&
-         "Unknown vector type encountered!");
-
   return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
 }
 
@@ -23753,8 +23751,8 @@ static SDValue PerformShuffleCombine(SDN
 
   // If we have legalized the vector types, look for blends of FADD and FSUB
   // nodes that we can fuse into an ADDSUB node.
-  if (TLI.isTypeLegal(VT) && Subtarget->hasSSE3())
-    if (SDValue AddSub = combineShuffleToAddSub(N, DAG))
+  if (TLI.isTypeLegal(VT))
+    if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
       return AddSub;
 
   // Combine 256-bit vector shuffles. This is only profitable when in AVX mode

Modified: llvm/trunk/test/CodeGen/X86/sse3-avx-addsub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse3-avx-addsub.ll?rev=256922&r1=256921&r2=256922&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse3-avx-addsub.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse3-avx-addsub.ll Wed Jan  6 03:08:49 2016
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+sse3 | FileCheck %s --check-prefix=SSE
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512
 
 ; Test ADDSUB ISel patterns.
 
@@ -101,6 +102,62 @@ define <2 x double> @test4(<2 x double>
   ret <2 x double> %vecinit2
 }
 
+define <16 x float> @test5(<16 x float> %A, <16 x float> %B) {
+; SSE-LABEL: test5:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubps %xmm4, %xmm0
+; SSE-NEXT:    addsubps %xmm5, %xmm1
+; SSE-NEXT:    addsubps %xmm6, %xmm2
+; SSE-NEXT:    addsubps %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test5:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vaddsubps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddsubps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: test5:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vaddps %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vsubps %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa32 {{.*#+}} zmm1 = [0,17,2,19,4,21,6,23,8,25,10,27,12,29,14,31]
+; AVX512-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %add = fadd <16 x float> %A, %B
+  %sub = fsub <16 x float> %A, %B
+  %vecinit2 = shufflevector <16 x float> %sub, <16 x float> %add, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
+  ret <16 x float> %vecinit2
+}
+
+define <8 x double> @test6(<8 x double> %A, <8 x double> %B) {
+; SSE-LABEL: test6:
+; SSE:       # BB#0:
+; SSE-NEXT:    addsubpd %xmm4, %xmm0
+; SSE-NEXT:    addsubpd %xmm5, %xmm1
+; SSE-NEXT:    addsubpd %xmm6, %xmm2
+; SSE-NEXT:    addsubpd %xmm7, %xmm3
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: test6:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vaddsubpd %ymm2, %ymm0, %ymm0
+; AVX1-NEXT:    vaddsubpd %ymm3, %ymm1, %ymm1
+; AVX1-NEXT:    retq
+;
+; AVX512-LABEL: test6:
+; AVX512:       # BB#0:
+; AVX512-NEXT:    vaddpd %zmm1, %zmm0, %zmm2
+; AVX512-NEXT:    vsubpd %zmm1, %zmm0, %zmm0
+; AVX512-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,9,2,11,4,13,6,15]
+; AVX512-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm0
+; AVX512-NEXT:    retq
+  %add = fadd <8 x double> %A, %B
+  %sub = fsub <8 x double> %A, %B
+  %vecinit2 = shufflevector <8 x double> %sub, <8 x double> %add, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15>
+  ret <8 x double> %vecinit2
+}
+
 define <4 x float> @test1b(<4 x float> %A, <4 x float>* %B) {
 ; SSE-LABEL: test1b:
 ; SSE:       # BB#0: