[llvm] 9afa681 - [X86] lowerShuffleAsRepeatedMaskAndLanePermute - allow 64-bit sublane shuffling on AVX512BW v64i8 shuffles
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 1 08:40:57 PDT 2022
Author: Simon Pilgrim
Date: 2022-04-01T16:40:10+01:00
New Revision: 9afa6811ad9ea9ef3fccafb81f0f5ac48bf27927
URL: https://github.com/llvm/llvm-project/commit/9afa6811ad9ea9ef3fccafb81f0f5ac48bf27927
DIFF: https://github.com/llvm/llvm-project/commit/9afa6811ad9ea9ef3fccafb81f0f5ac48bf27927.diff
LOG: [X86] lowerShuffleAsRepeatedMaskAndLanePermute - allow 64-bit sublane shuffling on AVX512BW v64i8 shuffles
We were only performing this on 256-bit vectors on AVX2 targets
Noticed while triaging Issue #54658
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3661de89e8070..4fe1d72127927 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17277,8 +17277,13 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
return SDValue();
// On AVX2 targets we can permute 256-bit vectors as 64-bit sub-lanes
- // (with PERMQ/PERMPD), otherwise we can only permute whole 128-bit lanes.
- int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1;
+ // (with PERMQ/PERMPD). On AVX512BW targets, permuting 64-bit sub-lanes, even
+ // with a variable shuffle, is worth it for 64xi8 vectors. Otherwise we can
+ // only permute whole 128-bit lanes.
+ int SubLaneScale = 1;
+ if ((Subtarget.hasAVX2() && VT.is256BitVector()) ||
+ (Subtarget.hasBWI() && VT == MVT::v64i8))
+ SubLaneScale = 2;
int NumSubLanes = NumLanes * SubLaneScale;
int NumSubLaneElts = NumLaneElts / SubLaneScale;
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
index 7b2217925faea..92fede6ec0abd 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v64.ll
@@ -815,19 +815,9 @@ define <64 x i8> @shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
; AVX512BW-NEXT: vpsrlw $8, %zmm1, %zmm1
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm0[2,3,6,7],zmm1[2,3,6,7]
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm2, %ymm3
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm4 = <u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14>
-; AVX512BW-NEXT: vpshufb %ymm4, %ymm3, %ymm3
-; AVX512BW-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[0,1,4,5]
-; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm5 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX512BW-NEXT: vpshufb %ymm5, %ymm1, %ymm1
-; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3],ymm1[4,5],ymm3[6,7]
-; AVX512BW-NEXT: vpshufb %ymm4, %ymm2, %ymm2
-; AVX512BW-NEXT: vpshufb %ymm5, %ymm0, %ymm0
-; AVX512BW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3],ymm0[4,5],ymm2[6,7]
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpackuswb %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,4,6,1,3,5,7]
+; AVX512BW-NEXT: vpermq %zmm0, %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: shuffle_v64i8_shift_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62_64_66_68_70_72_74_76_78_80_82_84_86_88_90_92_94_96_98_100_102_104_106_108_110_112_114_116_118_120_122_124_126:
More information about the llvm-commits
mailing list