[PATCH] D143789: [X86] Widen i16 shuffle masks if vector width < 512 even with BWI
Noah Goldstein via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 22 09:15:19 PST 2023
goldstein.w.n updated this revision to Diff 499548.
goldstein.w.n added a comment.
Rebase
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D143789/new/
https://reviews.llvm.org/D143789
Files:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/combine-sdiv.ll
llvm/test/CodeGen/X86/shuffle-blendw.ll
Index: llvm/test/CodeGen/X86/shuffle-blendw.ll
===================================================================
--- llvm/test/CodeGen/X86/shuffle-blendw.ll
+++ llvm/test/CodeGen/X86/shuffle-blendw.ll
@@ -66,13 +66,13 @@
; X86-AVX512-LABEL: blendw_to_blendd_32:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: vpaddw %ymm2, %ymm0, %ymm0
-; X86-AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7],ymm0[8,9],ymm1[10,11],ymm0[12,13],ymm1[14,15]
+; X86-AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; X86-AVX512-NEXT: retl
;
; X64-AVX512-LABEL: blendw_to_blendd_32:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpaddw %ymm2, %ymm0, %ymm0
-; X64-AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7],ymm0[8,9],ymm1[10,11],ymm0[12,13],ymm1[14,15]
+; X64-AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
; X64-AVX512-NEXT: retq
%x1 = add <16 x i16> %x, %z
%shuffle = shufflevector <16 x i16> %x1, <16 x i16> %y, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 4, i32 5, i32 22, i32 23, i32 8, i32 9, i32 26, i32 27, i32 12, i32 13, i32 30, i32 31>
@@ -119,13 +119,13 @@
; X86-AVX512-LABEL: blendw_to_blendd_16:
; X86-AVX512: # %bb.0:
; X86-AVX512-NEXT: vpaddw %xmm2, %xmm0, %xmm0
-; X86-AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; X86-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; X86-AVX512-NEXT: retl
;
; X64-AVX512-LABEL: blendw_to_blendd_16:
; X64-AVX512: # %bb.0:
; X64-AVX512-NEXT: vpaddw %xmm2, %xmm0, %xmm0
-; X64-AVX512-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; X64-AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
; X64-AVX512-NEXT: retq
%x1 = add <8 x i16> %x, %z
%shuffle = shufflevector <8 x i16> %x1, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
Index: llvm/test/CodeGen/X86/combine-sdiv.ll
===================================================================
--- llvm/test/CodeGen/X86/combine-sdiv.ll
+++ llvm/test/CodeGen/X86/combine-sdiv.ll
@@ -2889,26 +2889,12 @@
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
; AVX1-NEXT: retq
;
-; AVX2-LABEL: combine_vec_sdiv_nonuniform7:
-; AVX2: # %bb.0:
-; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX2-NEXT: vpsubw %xmm0, %xmm1, %xmm1
-; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: combine_vec_sdiv_nonuniform7:
-; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpsubw %xmm0, %xmm1, %xmm1
-; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: combine_vec_sdiv_nonuniform7:
-; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512BW-NEXT: vpsubw %xmm0, %xmm1, %xmm1
-; AVX512BW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3],xmm0[4,5,6,7]
-; AVX512BW-NEXT: retq
+; AVX2ORLATER-LABEL: combine_vec_sdiv_nonuniform7:
+; AVX2ORLATER: # %bb.0:
+; AVX2ORLATER-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; AVX2ORLATER-NEXT: vpsubw %xmm0, %xmm1, %xmm1
+; AVX2ORLATER-NEXT: vpblendd {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3]
+; AVX2ORLATER-NEXT: retq
;
; XOP-LABEL: combine_vec_sdiv_nonuniform7:
; XOP: # %bb.0:
Index: llvm/lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- llvm/lib/Target/X86/X86ISelLowering.cpp
+++ llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19925,9 +19925,10 @@
if ((VT == MVT::i16 || VT == MVT::i8) && !Subtarget.hasBWI())
return false;
- // i8 is better to be widen to i16, because there is PBLENDW for vXi16
- // when the vector bit size is 128 or 256.
- if (VT == MVT::i8 && V1.getSimpleValueType().getSizeInBits() < 512)
+ // If vec width < 512, widen i8/i16 even with BWI as blendd/blendps/blendpd
+ // are preferable to blendw/blendvb/masked-mov.
+ if ((VT == MVT::i16 || VT == MVT::i8) &&
+ V1.getSimpleValueType().getSizeInBits() < 512)
return false;
auto HasMaskOperation = [&](SDValue V) {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D143789.499548.patch
Type: text/x-patch
Size: 4255 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230222/64f1ec46/attachment.bin>
More information about the llvm-commits
mailing list