[llvm] r347143 - [X86] Add support for matching PACKUSWB from a v64i8 shuffle.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 17 10:54:43 PST 2018
Author: ctopper
Date: Sat Nov 17 10:54:43 2018
New Revision: 347143
URL: http://llvm.org/viewvc/llvm-project?rev=347143&view=rev
Log:
[X86] Add support for matching PACKUSWB from a v64i8 shuffle.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347143&r1=347142&r2=347143&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 17 10:54:43 2018
@@ -15422,6 +15422,11 @@ static SDValue lowerV64I8VectorShuffle(c
lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
return V;
+ // Use dedicated pack instructions for masks that match their pattern.
+ if (SDValue V = lowerVectorShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG,
+ Subtarget))
+ return V;
+
// Try to use shift instructions.
if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
Zeroable, Subtarget, DAG))
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll?rev=347143&r1=347142&r2=347143&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v64.ll Sat Nov 17 10:54:43 2018
@@ -590,11 +590,7 @@ define <64 x i8> @shuffle_v64i8_shift_00
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512BW-NEXT: vpackuswb %ymm2, %ymm3, %ymm2
-; AVX512BW-NEXT: vpackuswb %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
@@ -609,10 +605,9 @@ define <64 x i8> @shuffle_v64i8_shift_00
;
; AVX512VBMI-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_64_66_68_70_72_74_76_78_16_18_20_22_24_26_28_30_80_82_84_86_88_90_92_94_32_34_36_38_40_42_44_46_96_98_100_102_104_106_108_110_48_50_52_54_56_58_60_62_112_114_116_118_120_122_124_126:
; AVX512VBMI: # %bb.0:
-; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm2
+; AVX512VBMI-NEXT: vpsrlw $8, %zmm0, %zmm0
; AVX512VBMI-NEXT: vpsrlw $8, %zmm1, %zmm1
-; AVX512VBMI-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,2,4,6,8,10,12,14,64,66,68,70,72,74,76,78,16,18,20,22,24,26,28,30,80,82,84,86,88,90,92,94,32,34,36,38,40,42,44,46,96,98,100,102,104,106,108,110,48,50,52,54,56,58,60,62,112,114,116,118,120,122,124,126]
-; AVX512VBMI-NEXT: vpermi2b %zmm1, %zmm2, %zmm0
+; AVX512VBMI-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
; AVX512VBMI-NEXT: retq
%1 = lshr <32 x i16> %a0, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
%2 = lshr <32 x i16> %a1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
More information about the llvm-commits
mailing list