[llvm] r279934 - [X86][AVX512] Only combine EVEX targets shuffles to shuffles of the same number of vector elements
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 28 10:27:14 PDT 2016
Author: rksimon
Date: Sun Aug 28 12:27:14 2016
New Revision: 279934
URL: http://llvm.org/viewvc/llvm-project?rev=279934&view=rev
Log:
[X86][AVX512] Only combine EVEX targets shuffles to shuffles of the same number of vector elements
Over eager combing prevents the correct folding of writemasks.
At the moment this occurs for ALL EVEX shuffles, in the future we need to check that the user of the root shuffle is a VSELECT that can fold to a writemask.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=279934&r1=279933&r2=279934&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Aug 28 12:27:14 2016
@@ -25287,6 +25287,7 @@ static bool combineX86ShuffleChain(Array
}
unsigned RootSizeInBits = RootVT.getSizeInBits();
+ unsigned NumRootElts = RootVT.getVectorNumElements();
unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
(RootVT.is256BitVector() && !Subtarget.hasAVX2());
@@ -25297,11 +25298,10 @@ static bool combineX86ShuffleChain(Array
// TODO - this currently prevents all lane shuffles from occurring.
// TODO - check for writemasks usage instead of always preventing combining.
// TODO - attempt to narrow Mask back to writemask size.
- if (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits &&
- (RootSizeInBits == 512 ||
- (Subtarget.hasVLX() && RootSizeInBits >= 128))) {
+ bool IsEVEXShuffle =
+ RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
+ if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
return false;
- }
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
@@ -25370,6 +25370,8 @@ static bool combineX86ShuffleChain(Array
if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
+ if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+ return false; // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
@@ -25383,6 +25385,8 @@ static bool combineX86ShuffleChain(Array
ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
+ if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+ return false; // AVX512 Writemask clash.
Res = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(Res.getNode());
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
@@ -25398,6 +25402,8 @@ static bool combineX86ShuffleChain(Array
ShuffleVT)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
+ if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+ return false; // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
@@ -25413,6 +25419,8 @@ static bool combineX86ShuffleChain(Array
Shuffle, ShuffleVT, PermuteImm)) {
if (Depth == 1 && Root.getOpcode() == Shuffle)
return false; // Nothing to do!
+ if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+ return false; // AVX512 Writemask clash.
V1 = DAG.getBitcast(ShuffleVT, V1);
DCI.AddToWorklist(V1.getNode());
V2 = DAG.getBitcast(ShuffleVT, V2);
Modified: llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll?rev=279934&r1=279933&r2=279934&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-half-conversions.ll Sun Aug 28 12:27:14 2016
@@ -3241,7 +3241,8 @@ define <8 x i16> @cvt_4f32_to_8i16_zero(
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: addq $24, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
@@ -4148,7 +4149,8 @@ define void @store_cvt_4f32_to_8i16_zero
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
; AVX512VL-NEXT: addq $16, %rsp
; AVX512VL-NEXT: popq %rbx
@@ -5136,7 +5138,8 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: addq $40, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
@@ -5939,7 +5942,8 @@ define void @store_cvt_4f64_to_8i16_zero
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
-; AVX512VL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512VL-NEXT: vpxord %xmm1, %xmm1, %xmm1
+; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; AVX512VL-NEXT: vmovdqa32 %xmm0, (%r14)
; AVX512VL-NEXT: addq $32, %rsp
; AVX512VL-NEXT: popq %rbx
More information about the llvm-commits
mailing list