[llvm] 1300a4f - Revert rG23cb32c6d5bda0919cc1ef129917ceb2dbf1b1b8 "[X86] combineX86ShufflesRecursively - treat ISD::TRUNCATE as faux shuffle"
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 12 06:46:18 PST 2023
Author: Simon Pilgrim
Date: 2023-02-12T14:46:08Z
New Revision: 1300a4fdae35468d6e1fe41a9fa2b2468fb6fe9d
URL: https://github.com/llvm/llvm-project/commit/1300a4fdae35468d6e1fe41a9fa2b2468fb6fe9d
DIFF: https://github.com/llvm/llvm-project/commit/1300a4fdae35468d6e1fe41a9fa2b2468fb6fe9d.diff
LOG: Revert rG23cb32c6d5bda0919cc1ef129917ceb2dbf1b1b8 "[X86] combineX86ShufflesRecursively - treat ISD::TRUNCATE as faux shuffle"
This is causing a miscompile - waiting on a regression test from @bkramer
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 52f323d319353..80b2da686232f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -40287,16 +40287,6 @@ static SDValue combineX86ShufflesRecursively(
OpMask.assign(NumElts, SM_SentinelUndef);
std::iota(OpMask.begin(), OpMask.end(), ExtractIdx);
OpZero = OpUndef = APInt::getNullValue(NumElts);
- } else if (Op.getOpcode() == ISD::TRUNCATE &&
- (RootSizeInBits % Op.getOperand(0).getValueSizeInBits()) == 0) {
- SDValue SrcVec = Op.getOperand(0);
- unsigned Scale = SrcVec.getValueSizeInBits() / VT.getSizeInBits();
- unsigned NumElts = VT.getVectorNumElements();
- OpInputs.assign({SrcVec});
- OpMask.assign(Scale * NumElts, SM_SentinelUndef);
- OpZero = OpUndef = APInt::getNullValue(Scale * NumElts);
- for (unsigned I = 0; I != NumElts; ++I)
- OpMask[I] = I * Scale;
} else {
return SDValue();
}
diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
index 8596bb56e1e2d..7f48f93bf7771 100644
--- a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
+++ b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
@@ -131,25 +131,32 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0
; AVX256VL-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX256VL: # %bb.0:
; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX256VL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
+; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX256VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; AVX256VL-NEXT: vpmovsxbd %xmm1, %ymm1
+; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k2
; AVX256VL-NEXT: vpmovsxbd %xmm0, %ymm0
-; AVX256VL-NEXT: vptestmd %ymm0, %ymm0, %k2
+; AVX256VL-NEXT: vptestmd %ymm0, %ymm0, %k3
; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
-; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k3} {z}
; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
-; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z}
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k2} {z}
; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2
; AVX256VL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
; AVX256VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3],ymm2[4,5],ymm1[6],ymm2[7]
-; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17]
-; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm2
-; AVX256VL-NEXT: vpslld $31, %ymm2, %ymm2
-; AVX256VL-NEXT: vptestmd %ymm2, %ymm2, %k1
-; AVX256VL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13],zero,zero,ymm1[8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23],zero,zero,ymm1[30,31,16,17]
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z}
+; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2
+; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,1,1]
+; AVX256VL-NEXT: vpternlogq $220, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2
+; AVX256VL-NEXT: vpmovsxwd %xmm2, %ymm1
+; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1
+; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX256VL-NEXT: vextracti128 $1, %ymm2, %xmm1
; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1
; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0
More information about the llvm-commits
mailing list