[llvm-branch-commits] [llvm] be653f6 - [X86] combineX86ShuffleChain - don't fold to truncate(concat(V1, V2)) if it was already a PACK op
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed May 18 16:42:54 PDT 2022
Author: Simon Pilgrim
Date: 2022-05-18T16:34:09-07:00
New Revision: be653f6292e76b82d3532e6fba0e15ce633bd80a
URL: https://github.com/llvm/llvm-project/commit/be653f6292e76b82d3532e6fba0e15ce633bd80a
DIFF: https://github.com/llvm/llvm-project/commit/be653f6292e76b82d3532e6fba0e15ce633bd80a.diff
LOG: [X86] combineX86ShuffleChain - don't fold to truncate(concat(V1,V2)) if it was already a PACK op
Fixes #55050
(cherry picked from commit e8305c0b8f492272446d517e67d451f390d35dbe)
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 682932b8f3e66..8bb7e81e19bbd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -37558,7 +37558,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(RootVT.is128BitVector() && Subtarget.hasVLX())) &&
(MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) {
- if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE)
+ // Bail if this was already a truncation or PACK node.
+ // We sometimes fail to match PACK if we demand known undef elements.
+ if (Depth == 0 && (Root.getOpcode() == ISD::TRUNCATE ||
+ Root.getOpcode() == X86ISD::PACKSS ||
+ Root.getOpcode() == X86ISD::PACKUS))
return SDValue(); // Nothing to do!
ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
index be65effbc7241..3be4e6777d6fe 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
@@ -174,3 +174,47 @@ define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) {
%sel = select <8 x i1> %mask, <8 x i32> %shl, <8 x i32> zeroinitializer
ret <8 x i32> %sel
}
+
+define i64 @PR55050() {
+; X86-LABEL: PR55050:
+; X86: # %bb.0: # %entry
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: testb %al, %al
+; X86-NEXT: jne .LBB10_2
+; X86-NEXT: # %bb.1: # %if
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: .LBB10_2: # %exit
+; X86-NEXT: movl %eax, %edx
+; X86-NEXT: retl
+;
+; X64-LABEL: PR55050:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb %al, %al
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+entry:
+ %i275 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> undef, <16 x i8> zeroinitializer)
+ %i277 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> undef, <16 x i8> zeroinitializer)
+ br i1 undef, label %exit, label %if
+
+if:
+ %i298 = bitcast <2 x i64> %i275 to <4 x i32>
+ %i299 = bitcast <2 x i64> %i277 to <4 x i32>
+ %i300 = shufflevector <4 x i32> %i298, <4 x i32> %i299, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+ %i339 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %i300, <4 x i32> undef)
+ %i354 = shufflevector <8 x i16> %i339, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef>
+ %i356 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %i354, <8 x i16> undef)
+ %i357 = shufflevector <16 x i8> %i356, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 5, i32 4, i32 16, i32 2, i32 1, i32 0, i32 16, i32 10, i32 9, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16>
+ %i361 = extractelement <16 x i8> %i357, i64 8
+ %i360 = and i8 %i361, 63
+ %i379 = zext i8 %i360 to i64
+ br label %exit
+
+exit:
+ %res = phi i64 [ %i379, %if ], [ 0, %entry ]
+ ret i64 %res
+}
+declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
More information about the llvm-branch-commits
mailing list