[llvm] r239697 - AVX-512: Implemented DAG lowering for shuff62x2/shufi62x2 instuctions ( Shuffle Packed Values at 128-bit Granularity )
Igor Breger
igor.breger at intel.com
Sun Jun 14 06:07:48 PDT 2015
Author: ibreger
Date: Sun Jun 14 08:07:47 2015
New Revision: 239697
URL: http://llvm.org/viewvc/llvm-project?rev=239697&view=rev
Log:
AVX-512: Implemented DAG lowering for shuff62x2/shufi62x2 instuctions ( Shuffle Packed Values at 128-bit Granularity )
Tests added , vector-shuffle-512-v8.ll test re-generated.
Differential Revision: http://reviews.llvm.org/D10300
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=239697&r1=239696&r2=239697&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jun 14 08:07:47 2015
@@ -9383,6 +9383,30 @@ static SDValue lowerV2X128VectorShuffle(
DAG.getConstant(PermMask, DL, MVT::i8));
}
+/// \brief Handle lowering 4-lane 128-bit shuffles.
+static SDValue lowerV4X128VectorShuffle(SDLoc DL, MVT VT, SDValue V1,
+ SDValue V2, ArrayRef<int> WidenedMask,
+ SelectionDAG &DAG) {
+
+ assert(WidenedMask.size() == 4 && "Unexpected mask size for 128bit shuffle!");
+ // form a 128-bit permutation.
+ // convert the 64-bit shuffle mask selection values into 128-bit selection
+ // bits defined by a vshuf64x2 instruction's immediate control byte.
+ unsigned PermMask = 0, Imm = 0;
+
+ for (int i = 0, Size = WidenedMask.size(); i < Size; ++i) {
+ if(WidenedMask[i] == SM_SentinelZero)
+ return SDValue();
+
+ // use first element in place of undef musk
+ Imm = (WidenedMask[i] == SM_SentinelUndef) ? 0 : WidenedMask[i];
+ PermMask |= (Imm % 4) << (i * 2);
+ }
+
+ return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2,
+ DAG.getConstant(PermMask, DL, MVT::i8));
+}
+
/// \brief Lower a vector shuffle by first fixing the 128-bit lanes and then
/// shuffling each lane.
///
@@ -10176,6 +10200,10 @@ static SDValue lowerV8X64VectorShuffle(S
ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
+ SmallVector<int, 4> WidenedMask;
+ if (canWidenShuffleElements(Mask, WidenedMask))
+ if(SDValue Op = lowerV4X128VectorShuffle(DL, VT, V1, V2, WidenedMask, DAG))
+ return Op;
// X86 has dedicated unpack instructions that can handle specific blend
// operations: UNPCKH and UNPCKL.
if (isShuffleEquivalent(V1, V2, Mask, {0, 8, 2, 10, 4, 12, 6, 14}))
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll?rev=239697&r1=239696&r2=239697&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle.ll Sun Jun 14 08:07:47 2015
@@ -116,10 +116,10 @@ define <16 x i32> @test15(<16 x i32> %a)
ret <16 x i32> %b
}
; CHECK-LABEL: test16
-; CHECK: valignq $2, %zmm0, %zmm1
+; CHECK: valignq $3, %zmm0, %zmm1
; CHECK: ret
define <8 x double> @test16(<8 x double> %a, <8 x double> %b) nounwind {
- %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
+ %c = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10>
ret <8 x double> %c
}
@@ -252,6 +252,62 @@ define <8 x double> @test32(<8 x double>
ret <8 x double> %c
}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
+; CHECK-LABEL: test_vshuff64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vshuff64x2 $136, %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1, i32 4, i32 5>
+ ret <8 x double> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mask(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
+; CHECK-LABEL: test_vshuff64x2_512_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwq %xmm2, %zmm1
+; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
+; CHECK-NEXT: vshuff64x2 $136, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT: retq
+ %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 0, i32 1, i32 4, i32 5>
+ %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
+ ret <8 x double> %res
+}
+
+define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
+; CHECK-LABEL: test_vshufi64x2_512_mask:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpmovsxwq %xmm2, %zmm1
+; CHECK-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
+; CHECK-NEXT: vptestmq %zmm1, %zmm1, %k1
+; CHECK-NEXT: vshufi64x2 $168, %zmm0, %zmm0, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 4, i32 5>
+ %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
+ ret <8 x i64> %res
+}
+
+define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
+; CHECK-LABEL: test_vshuff64x2_512_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vshuff64x2 $40, %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %x1 = load <8 x double>,<8 x double> *%ptr,align 1
+ %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 4, i32 5, i32 0, i32 1>
+ ret <8 x double> %res
+}
+
+define <16 x float> @test_vshuff32x4_512_mem(<16 x float> %x, <16 x float> *%ptr) nounwind {
+; CHECK-LABEL: test_vshuff32x4_512_mem:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vshuff64x2 $20, %zmm0, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %x1 = load <16 x float>,<16 x float> *%ptr,align 1
+ %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
+ ret <16 x float> %res
+}
+
define <16 x i32> @test_align_v16i32_rr(<16 x i32> %a, <16 x i32> %b) nounwind {
; CHECK-LABEL: test_align_v16i32_rr:
; CHECK: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=239697&r1=239696&r2=239697&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Sun Jun 14 08:07:47 2015
@@ -88,7 +88,7 @@ define <8 x double> @shuffle_v8f64_70000
define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
; ALL-LABEL: shuffle_v8f64_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vpermpd $68, %zmm0, %zmm0
+; ALL-NEXT: vshuff64x2 $160, %zmm0, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x double> %shuffle
@@ -650,7 +650,7 @@ define <8 x i64> @shuffle_v8i64_70000000
define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
; ALL-LABEL: shuffle_v8i64_01014545:
; ALL: # BB#0:
-; ALL-NEXT: vpermq $68, %zmm0, %zmm0
+; ALL-NEXT: vshufi64x2 $160, %zmm0, %zmm0, %zmm0
; ALL-NEXT: retq
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
ret <8 x i64> %shuffle
More information about the llvm-commits
mailing list