[llvm] be62041 - [X86] matchBinaryShuffle - match PACKUS for v2i64 -> v4i32 shuffle truncation patterns.
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 21 05:38:52 PDT 2023
Author: Simon Pilgrim
Date: 2023-07-21T13:32:04+01:00
New Revision: be62041e7e4e3a77bafc336addc8aa6953ea4d09
URL: https://github.com/llvm/llvm-project/commit/be62041e7e4e3a77bafc336addc8aa6953ea4d09
DIFF: https://github.com/llvm/llvm-project/commit/be62041e7e4e3a77bafc336addc8aa6953ea4d09.diff
LOG: [X86] matchBinaryShuffle - match PACKUS for v2i64 -> v4i32 shuffle truncation patterns.
Handle PACKUSWD on +SSE41 targets, or fallback to PACKUSBW on any +SSE2 target
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/packus.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 44654a804d5505..8a26a0bcc34a46 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -39942,6 +39942,22 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
isTargetShuffleEquivalent(MaskVT, Mask, {0, 2, 4, 6}, DAG) &&
V1.getScalarValueSizeInBits() == 64 &&
V2.getScalarValueSizeInBits() == 64) {
+ // Use (SSE41) PACKUSWD if the leading zerobits goto the lowest 16-bits.
+ unsigned MinLZV1 = DAG.computeKnownBits(V1).countMinLeadingZeros();
+ unsigned MinLZV2 = DAG.computeKnownBits(V2).countMinLeadingZeros();
+ if (Subtarget.hasSSE41() && MinLZV1 >= 48 && MinLZV2 >= 48) {
+ SrcVT = MVT::v4i32;
+ DstVT = MVT::v8i16;
+ Shuffle = X86ISD::PACKUS;
+ return true;
+ }
+ // Use PACKUSBW if the leading zerobits goto the lowest 8-bits.
+ if (MinLZV1 >= 56 && MinLZV2 >= 56) {
+ SrcVT = MVT::v8i16;
+ DstVT = MVT::v16i8;
+ Shuffle = X86ISD::PACKUS;
+ return true;
+ }
// Use PACKSSWD if the signbits extend to the lowest 16-bits.
if (DAG.ComputeNumSignBits(V1) > 48 && DAG.ComputeNumSignBits(V2) > 48) {
SrcVT = MVT::v4i32;
diff --git a/llvm/test/CodeGen/X86/packus.ll b/llvm/test/CodeGen/X86/packus.ll
index c1c4cc5b7b0f28..bbc3443db2b030 100644
--- a/llvm/test/CodeGen/X86/packus.ll
+++ b/llvm/test/CodeGen/X86/packus.ll
@@ -245,18 +245,25 @@ define <8 x i16> @shuffle_lshr_2v4i32(<4 x i32> %a0, <4 x i32> %a1) {
}
define <4 x i32> @shuffle_lshr_2v2i64(<2 x i64> %a0, <2 x i64> %a1) {
-; SSE-LABEL: shuffle_lshr_2v2i64:
-; SSE: # %bb.0:
-; SSE-NEXT: psrlq $63, %xmm0
-; SSE-NEXT: psrlq $63, %xmm1
-; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: shuffle_lshr_2v2i64:
+; SSE2: # %bb.0:
+; SSE2-NEXT: psrlq $63, %xmm0
+; SSE2-NEXT: psrlq $63, %xmm1
+; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE4-LABEL: shuffle_lshr_2v2i64:
+; SSE4: # %bb.0:
+; SSE4-NEXT: psrlq $63, %xmm0
+; SSE4-NEXT: psrlq $63, %xmm1
+; SSE4-NEXT: packusdw %xmm1, %xmm0
+; SSE4-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: shuffle_lshr_2v2i64:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $63, %xmm1, %xmm1
-; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%lshr0 = lshr <2 x i64> %a0, <i64 63, i64 63>
%lshr1 = lshr <2 x i64> %a1, <i64 63, i64 63>
@@ -267,18 +274,25 @@ define <4 x i32> @shuffle_lshr_2v2i64(<2 x i64> %a0, <2 x i64> %a1) {
}
define <4 x float> @shuffle_lshr_2v2i64_bitcast(<2 x i64> %a0, <2 x i64> %a1) {
-; SSE-LABEL: shuffle_lshr_2v2i64_bitcast:
-; SSE: # %bb.0:
-; SSE-NEXT: psrlq $63, %xmm0
-; SSE-NEXT: psrlq $63, %xmm1
-; SSE-NEXT: packssdw %xmm1, %xmm0
-; SSE-NEXT: ret{{[l|q]}}
+; SSE2-LABEL: shuffle_lshr_2v2i64_bitcast:
+; SSE2: # %bb.0:
+; SSE2-NEXT: psrlq $63, %xmm0
+; SSE2-NEXT: psrlq $63, %xmm1
+; SSE2-NEXT: packuswb %xmm1, %xmm0
+; SSE2-NEXT: ret{{[l|q]}}
+;
+; SSE4-LABEL: shuffle_lshr_2v2i64_bitcast:
+; SSE4: # %bb.0:
+; SSE4-NEXT: psrlq $63, %xmm0
+; SSE4-NEXT: psrlq $63, %xmm1
+; SSE4-NEXT: packusdw %xmm1, %xmm0
+; SSE4-NEXT: ret{{[l|q]}}
;
; AVX-LABEL: shuffle_lshr_2v2i64_bitcast:
; AVX: # %bb.0:
; AVX-NEXT: vpsrlq $63, %xmm0, %xmm0
; AVX-NEXT: vpsrlq $63, %xmm1, %xmm1
-; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
%lshr0 = lshr <2 x i64> %a0, <i64 63, i64 63>
%lshr1 = lshr <2 x i64> %a1, <i64 63, i64 63>
More information about the llvm-commits
mailing list