[llvm] r315807 - [X86][SSE] Support combining AND(EXTRACT(SHUF(X)), C) -> EXTRACT(SHUF(X))
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Oct 14 08:01:36 PDT 2017
Author: rksimon
Date: Sat Oct 14 08:01:36 2017
New Revision: 315807
URL: http://llvm.org/viewvc/llvm-project?rev=315807&view=rev
Log:
[X86][SSE] Support combining AND(EXTRACT(SHUF(X)), C) -> EXTRACT(SHUF(X))
If we are applying a byte mask to a value extracted from a shuffle, see if we can combine the mask into shuffle.
Fixes the last issue with PR22415
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=315807&r1=315806&r2=315807&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Oct 14 08:01:36 2017
@@ -32442,6 +32442,45 @@ static SDValue combineAnd(SDNode *N, Sel
}
}
+ // Attempt to combine a scalar bitmask AND with an extracted shuffle.
+ if ((VT.getScalarSizeInBits() % 8) == 0 &&
+ N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(N->getOperand(0).getOperand(1))) {
+ SDValue BitMask = N->getOperand(1);
+ SDValue SrcVec = N->getOperand(0).getOperand(0);
+ EVT SrcVecVT = SrcVec.getValueType();
+
+ // Check that the constant bitmask masks whole bytes.
+ APInt UndefElts;
+ SmallVector<APInt, 64> EltBits;
+ if (VT == SrcVecVT.getScalarType() &&
+ N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) &&
+ getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) &&
+ llvm::all_of(EltBits, [](APInt M) {
+ return M.isNullValue() || M.isAllOnesValue();
+ })) {
+ unsigned NumElts = SrcVecVT.getVectorNumElements();
+ unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8;
+ unsigned Idx = N->getOperand(0).getConstantOperandVal(1);
+
+ // Create a root shuffle mask from the byte mask and the extracted index.
+ SmallVector<int, 16> ShuffleMask(NumElts * Scale, SM_SentinelUndef);
+ for (unsigned i = 0; i != Scale; ++i) {
+ if (UndefElts[i])
+ continue;
+ int VecIdx = Scale * Idx + i;
+ ShuffleMask[VecIdx] =
+ EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx;
+ }
+
+ if (SDValue Shuffle = combineX86ShufflesRecursively(
+ {SrcVec}, 0, SrcVec, ShuffleMask, {}, /*Depth*/ 2,
+ /*HasVarMask*/ false, DAG, DCI, Subtarget))
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle,
+ N->getOperand(0).getOperand(1));
+ }
+ }
+
return SDValue();
}
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll?rev=315807&r1=315806&r2=315807&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-ssse3.ll Sat Oct 14 08:01:36 2017
@@ -750,23 +750,20 @@ define <16 x i8> @constant_fold_pshufb_2
define i32 @mask_zzz3_v16i8(<16 x i8> %a0) {
; SSSE3-LABEL: mask_zzz3_v16i8:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,10,12,14,8,10,12,14,0,2,4,6,8,10,12,14]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,zero,zero,xmm0[14,u,u,u,u,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: andl $-16777216, %eax # imm = 0xFF000000
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_zzz3_v16i8:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm0[14]
; SSE41-NEXT: pextrd $3, %xmm0, %eax
-; SSE41-NEXT: andl $-16777216, %eax # imm = 0xFF000000
; SSE41-NEXT: retq
;
; AVX-LABEL: mask_zzz3_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,zero,zero,xmm0[14]
; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: andl $-16777216, %eax # imm = 0xFF000000
; AVX-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>)
%2 = bitcast <16 x i8> %1 to <4 x i32>
@@ -778,23 +775,20 @@ define i32 @mask_zzz3_v16i8(<16 x i8> %a
define i32 @mask_z1z3_v16i8(<16 x i8> %a0) {
; SSSE3-LABEL: mask_z1z3_v16i8:
; SSSE3: # BB#0:
-; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,10,12,14,8,10,12,14,0,2,4,6,8,10,12,14]
+; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = zero,xmm0[10],zero,xmm0[14,u,u,u,u,u,u,u,u,u,u,u,u]
; SSSE3-NEXT: movd %xmm0, %eax
-; SSSE3-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00
; SSSE3-NEXT: retq
;
; SSE41-LABEL: mask_z1z3_v16i8:
; SSE41: # BB#0:
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,xmm0[10],zero,xmm0[14]
; SSE41-NEXT: pextrd $3, %xmm0, %eax
-; SSE41-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00
; SSE41-NEXT: retq
;
; AVX-LABEL: mask_z1z3_v16i8:
; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,0,2,4,6,8,10,12,14]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,u,u,u,u,u,u],zero,xmm0[10],zero,xmm0[14]
; AVX-NEXT: vpextrd $3, %xmm0, %eax
-; AVX-NEXT: andl $-16711936, %eax # imm = 0xFF00FF00
; AVX-NEXT: retq
%1 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14, i8 0, i8 2, i8 4, i8 6, i8 8, i8 10, i8 12, i8 14>)
%2 = bitcast <16 x i8> %1 to <4 x i32>
@@ -806,16 +800,14 @@ define i32 @mask_z1z3_v16i8(<16 x i8> %a
define i32 @PR22415(double %a0) {
; SSE-LABEL: PR22415:
; SSE: # BB#0:
-; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
; SSE-NEXT: movd %xmm0, %eax
-; SSE-NEXT: andl $16777215, %eax # imm = 0xFFFFFF
; SSE-NEXT: retq
;
; AVX-LABEL: PR22415:
; AVX: # BB#0:
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: vmovd %xmm0, %eax
-; AVX-NEXT: andl $16777215, %eax # imm = 0xFFFFFF
; AVX-NEXT: retq
%1 = bitcast double %a0 to <8 x i8>
%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 undef>
More information about the llvm-commits
mailing list