[llvm] 09fd8f0 - [X86] matchBinaryPermuteShuffle - match AVX512 "cross lane" SHLDQ/SRLDQ style patterns using VALIGN (#140538)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 20 08:08:00 PDT 2025
Author: Simon Pilgrim
Date: 2025-05-20T16:07:56+01:00
New Revision: 09fd8f0093b8ff489d76285d893be152e4ca4c24
URL: https://github.com/llvm/llvm-project/commit/09fd8f0093b8ff489d76285d893be152e4ca4c24
DIFF: https://github.com/llvm/llvm-project/commit/09fd8f0093b8ff489d76285d893be152e4ca4c24.diff
LOG: [X86] matchBinaryPermuteShuffle - match AVX512 "cross lane" SHLDQ/SRLDQ style patterns using VALIGN (#140538)
Very similar to what we do in lowerShuffleAsVALIGN
I've updated isTargetShuffleEquivalent to correctly handle SM_SentinelZero in the expected shuffle mask, but it only allows an exact match (or the test mask was undef) - it can't be used to match zero elements with MaskedVectorIsZero.
Noticed while working on #140516
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1ee49a4f8a97a..4fb028131653f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -10096,7 +10096,10 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
if (Size != (int)ExpectedMask.size())
return false;
assert(llvm::all_of(ExpectedMask,
- [Size](int M) { return isInRange(M, 0, 2 * Size); }) &&
+ [Size](int M) {
+ return M == SM_SentinelZero ||
+ isInRange(M, 0, 2 * Size);
+ }) &&
"Illegal target shuffle mask");
// Check for out-of-range target shuffle mask indices.
@@ -10119,6 +10122,9 @@ static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask,
int ExpectedIdx = ExpectedMask[i];
if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx)
continue;
+ // If we failed to match an expected SM_SentinelZero then early out.
+ if (ExpectedIdx < 0)
+ return false;
if (MaskIdx == SM_SentinelZero) {
// If we need this expected index to be a zero element, then update the
// relevant zero mask and perform the known bits at the end to minimize
@@ -39594,18 +39600,46 @@ static bool matchBinaryPermuteShuffle(
((MaskVT.is128BitVector() && Subtarget.hasVLX()) ||
(MaskVT.is256BitVector() && Subtarget.hasVLX()) ||
(MaskVT.is512BitVector() && Subtarget.hasAVX512()))) {
+ MVT AlignVT = MVT::getVectorVT(MVT::getIntegerVT(EltSizeInBits),
+ MaskVT.getSizeInBits() / EltSizeInBits);
if (!isAnyZero(Mask)) {
int Rotation = matchShuffleAsElementRotate(V1, V2, Mask);
if (0 < Rotation) {
Shuffle = X86ISD::VALIGN;
- if (EltSizeInBits == 64)
- ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64);
- else
- ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32);
+ ShuffleVT = AlignVT;
PermuteImm = Rotation;
return true;
}
}
+ // See if we can use VALIGN as a cross-lane version of VSHLDQ/VSRLDQ.
+ unsigned ZeroLo = Zeroable.countr_one();
+ unsigned ZeroHi = Zeroable.countl_one();
+ assert((ZeroLo + ZeroHi) < NumMaskElts && "Zeroable shuffle detected");
+ if (ZeroLo) {
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
+ std::iota(ShiftMask.begin() + ZeroLo, ShiftMask.end(), 0);
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
+ V1 = V1;
+ V2 = getZeroVector(AlignVT, Subtarget, DAG, DL);
+ Shuffle = X86ISD::VALIGN;
+ ShuffleVT = AlignVT;
+ PermuteImm = NumMaskElts - ZeroLo;
+ return true;
+ }
+ }
+ if (ZeroHi) {
+ SmallVector<int, 16> ShiftMask(NumMaskElts, SM_SentinelZero);
+ std::iota(ShiftMask.begin(), ShiftMask.begin() + NumMaskElts - ZeroHi,
+ ZeroHi);
+ if (isTargetShuffleEquivalent(MaskVT, Mask, ShiftMask, DAG, V1)) {
+ V2 = V1;
+ V1 = getZeroVector(AlignVT, Subtarget, DAG, DL);
+ Shuffle = X86ISD::VALIGN;
+ ShuffleVT = AlignVT;
+ PermuteImm = ZeroHi;
+ return true;
+ }
+ }
}
// Attempt to match against PALIGNR byte rotate.
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
index b3b90b5f51501..68967c2ce6536 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512f.ll
@@ -812,10 +812,8 @@ define <8 x i64> @combine_vpermt2var_8i64_as_valignq(<8 x i64> %x0, <8 x i64> %x
define <8 x i64> @combine_vpermt2var_8i64_as_valignq_zero(<8 x i64> %x0) {
; CHECK-LABEL: combine_vpermt2var_8i64_as_valignq_zero:
; CHECK: # %bb.0:
-; CHECK-NEXT: vpmovsxbq {{.*#+}} zmm2 = [15,0,1,2,3,4,5,6]
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpermt2q %zmm0, %zmm2, %zmm1
-; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: valignq {{.*#+}} zmm0 = zmm0[7],zmm1[0,1,2,3,4,5,6]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 15, i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6>, <8 x i64> zeroinitializer, <8 x i64> %x0, i8 -1)
ret <8 x i64> %res0
@@ -825,8 +823,7 @@ define <8 x i64> @combine_vpermt2var_8i64_as_zero_valignq(<8 x i64> %x0) {
; CHECK-LABEL: combine_vpermt2var_8i64_as_zero_valignq:
; CHECK: # %bb.0:
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; CHECK-NEXT: vpmovsxbq {{.*#+}} zmm2 = [15,0,1,2,3,4,5,6]
-; CHECK-NEXT: vpermt2q %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: valignq {{.*#+}} zmm0 = zmm1[7],zmm0[0,1,2,3,4,5,6]
; CHECK-NEXT: ret{{[l|q]}}
%res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 15, i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6>, <8 x i64> %x0, <8 x i64> zeroinitializer, i8 -1)
ret <8 x i64> %res0
More information about the llvm-commits
mailing list