[llvm] r367782 - [X86] SimplifyMultipleUseDemandedBits - Add target shuffle support
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 4 05:24:40 PDT 2019
Author: rksimon
Date: Sun Aug 4 05:24:40 2019
New Revision: 367782
URL: http://llvm.org/viewvc/llvm-project?rev=367782&view=rev
Log:
[X86] SimplifyMultipleUseDemandedBits - Add target shuffle support
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vec_smulo.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=367782&r1=367781&r2=367782&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Aug 4 05:24:40 2019
@@ -34706,7 +34706,10 @@ bool X86TargetLowering::SimplifyDemanded
SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
SelectionDAG &DAG, unsigned Depth) const {
+ int NumElts = DemandedElts.getBitWidth();
unsigned Opc = Op.getOpcode();
+ EVT VT = Op.getValueType();
+
switch (Opc) {
case X86ISD::PINSRB:
case X86ISD::PINSRW: {
@@ -34721,6 +34724,49 @@ SDValue X86TargetLowering::SimplifyMulti
}
}
+ SmallVector<int, 16> ShuffleMask;
+ SmallVector<SDValue, 2> ShuffleOps;
+ if (VT.isSimple() && VT.isVector() &&
+ resolveTargetShuffleInputs(Op, ShuffleOps, ShuffleMask, DAG, Depth)) {
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ int NumOps = ShuffleOps.size();
+ if (ShuffleMask.size() == NumElts &&
+ llvm::all_of(ShuffleOps, [VT](SDValue V) {
+ return VT.getSizeInBits() == V.getValueSizeInBits();
+ })) {
+
+ // Bitmask that indicates which ops have only been accessed 'inline'.
+ APInt IdentityOp = APInt::getAllOnesValue(NumOps);
+ bool AllUndef = true;
+
+ for (int i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (SM_SentinelUndef == M || !DemandedElts[i])
+ continue;
+ AllUndef = false;
+ int Op = M / NumElts;
+ int Index = M % NumElts;
+ if (M < 0 || Index != i) {
+ IdentityOp.clearAllBits();
+ break;
+ }
+ IdentityOp &= APInt::getOneBitSet(NumOps, Op);
+ if (IdentityOp == 0)
+ break;
+ }
+ assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) &&
+ "Multiple identity shuffles detected");
+
+ if (AllUndef)
+ return DAG.getUNDEF(VT);
+
+ for (int i = 0; i != NumOps; ++i)
+ if (IdentityOp[i])
+ return DAG.getBitcast(VT, ShuffleOps[i]);
+ }
+ }
+
return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
Op, DemandedBits, DemandedElts, DAG, Depth);
}
Modified: llvm/trunk/test/CodeGen/X86/vec_smulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_smulo.ll?rev=367782&r1=367781&r2=367782&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_smulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_smulo.ll Sun Aug 4 05:24:40 2019
@@ -562,143 +562,145 @@ define <6 x i32> @smulo_v6i32(<6 x i32>
; SSE2-LABEL: smulo_v6i32:
; SSE2: # %bb.0:
; SSE2-NEXT: movq %rdi, %rax
-; SSE2-NEXT: movd %r8d, %xmm9
+; SSE2-NEXT: movd %r8d, %xmm8
; SSE2-NEXT: movd %ecx, %xmm0
-; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm9[0],xmm0[1],xmm9[1]
-; SSE2-NEXT: movd %edx, %xmm6
-; SSE2-NEXT: movd %esi, %xmm5
-; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
+; SSE2-NEXT: movd %edx, %xmm3
+; SSE2-NEXT: movd %esi, %xmm6
+; SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0]
; SSE2-NEXT: movd {{.*#+}} xmm10 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
-; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
-; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1]
-; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSE2-NEXT: movd %r9d, %xmm12
-; SSE2-NEXT: movd {{.*#+}} xmm11 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpckldq {{.*#+}} xmm12 = xmm12[0],xmm11[0],xmm12[1],xmm11[1]
-; SSE2-NEXT: movd {{.*#+}} xmm8 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm12 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[1],xmm12[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSE2-NEXT: movd %r9d, %xmm13
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1]
+; SSE2-NEXT: movdqa %xmm13, %xmm11
+; SSE2-NEXT: punpckldq {{.*#+}} xmm11 = xmm11[0],xmm2[0],xmm11[1],xmm2[1]
+; SSE2-NEXT: movd {{.*#+}} xmm9 = mem[0],zero,zero,zero
+; SSE2-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
+; SSE2-NEXT: pmuludq %xmm7, %xmm13
+; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm9[0],xmm7[1],xmm9[1]
; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; SSE2-NEXT: pxor %xmm4, %xmm4
+; SSE2-NEXT: pxor %xmm5, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm5
+; SSE2-NEXT: pand %xmm6, %xmm5
; SSE2-NEXT: pxor %xmm0, %xmm0
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm0
-; SSE2-NEXT: pand %xmm5, %xmm0
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm1
-; SSE2-NEXT: pand %xmm3, %xmm1
-; SSE2-NEXT: paddd %xmm0, %xmm1
-; SSE2-NEXT: pmuludq %xmm5, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,3,2,3]
-; SSE2-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm10[0,0]
-; SSE2-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm9[0,0]
-; SSE2-NEXT: pmuludq %xmm7, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,3,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; SSE2-NEXT: psubd %xmm1, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,2,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
-; SSE2-NEXT: movdqa %xmm7, (%rcx)
-; SSE2-NEXT: psrad $31, %xmm7
-; SSE2-NEXT: pcmpeqd %xmm5, %xmm7
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
-; SSE2-NEXT: pxor %xmm0, %xmm7
-; SSE2-NEXT: pxor %xmm1, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm12, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm12, %xmm4
-; SSE2-NEXT: pand %xmm2, %xmm4
-; SSE2-NEXT: paddd %xmm1, %xmm4
-; SSE2-NEXT: pmuludq %xmm12, %xmm2
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
-; SSE2-NEXT: pmuludq %xmm8, %xmm11
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,3,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT: psubd %xmm4, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm6, %xmm0
+; SSE2-NEXT: pand %xmm1, %xmm0
+; SSE2-NEXT: paddd %xmm5, %xmm0
+; SSE2-NEXT: pmuludq %xmm6, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
+; SSE2-NEXT: shufps {{.*#+}} xmm12 = xmm12[0,0],xmm10[0,0]
+; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm8[0,0]
+; SSE2-NEXT: pmuludq %xmm12, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
+; SSE2-NEXT: psubd %xmm0, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSE2-NEXT: movdqa %xmm0, (%rcx)
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm3
+; SSE2-NEXT: pand %xmm11, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm11, %xmm4
+; SSE2-NEXT: pand %xmm7, %xmm4
+; SSE2-NEXT: paddd %xmm3, %xmm4
+; SSE2-NEXT: pmuludq %xmm9, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm13[1,3,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; SSE2-NEXT: psubd %xmm4, %xmm5
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm11[0,2,2,3]
-; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT: movq %xmm2, 16(%rcx)
-; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm2
-; SSE2-NEXT: pxor %xmm0, %xmm2
-; SSE2-NEXT: movq %xmm2, 16(%rdi)
-; SSE2-NEXT: movdqa %xmm7, (%rdi)
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm13[0,2,2,3]
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT: movq %xmm3, 16(%rcx)
+; SSE2-NEXT: psrad $31, %xmm3
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm3
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: movq %xmm3, 16(%rdi)
+; SSE2-NEXT: movdqa %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSSE3-LABEL: smulo_v6i32:
; SSSE3: # %bb.0:
; SSSE3-NEXT: movq %rdi, %rax
-; SSSE3-NEXT: movd %r8d, %xmm9
+; SSSE3-NEXT: movd %r8d, %xmm8
; SSSE3-NEXT: movd %ecx, %xmm0
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm9[0],xmm0[1],xmm9[1]
-; SSSE3-NEXT: movd %edx, %xmm6
-; SSSE3-NEXT: movd %esi, %xmm5
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm0[0]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
+; SSSE3-NEXT: movd %edx, %xmm3
+; SSSE3-NEXT: movd %esi, %xmm6
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm3[0],xmm6[1],xmm3[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm6 = xmm6[0],xmm0[0]
; SSSE3-NEXT: movd {{.*#+}} xmm10 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1]
-; SSSE3-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
-; SSSE3-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm7[0],xmm3[1],xmm7[1]
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
-; SSSE3-NEXT: movd %r9d, %xmm12
-; SSSE3-NEXT: movd {{.*#+}} xmm11 = mem[0],zero,zero,zero
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm12 = xmm12[0],xmm11[0],xmm12[1],xmm11[1]
-; SSSE3-NEXT: movd {{.*#+}} xmm8 = mem[0],zero,zero,zero
+; SSSE3-NEXT: movd {{.*#+}} xmm12 = mem[0],zero,zero,zero
+; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm12[0],xmm1[1],xmm12[1]
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; SSSE3-NEXT: movd %r9d, %xmm13
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1]
+; SSSE3-NEXT: movdqa %xmm13, %xmm11
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm11 = xmm11[0],xmm2[0],xmm11[1],xmm2[1]
+; SSSE3-NEXT: movd {{.*#+}} xmm9 = mem[0],zero,zero,zero
+; SSSE3-NEXT: movd {{.*#+}} xmm7 = mem[0],zero,zero,zero
+; SSSE3-NEXT: pmuludq %xmm7, %xmm13
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm9[0],xmm7[1],xmm9[1]
; SSSE3-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; SSSE3-NEXT: pxor %xmm4, %xmm4
+; SSSE3-NEXT: pxor %xmm5, %xmm5
+; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5
+; SSSE3-NEXT: pand %xmm6, %xmm5
; SSSE3-NEXT: pxor %xmm0, %xmm0
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm0
-; SSSE3-NEXT: pand %xmm5, %xmm0
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pcmpgtd %xmm5, %xmm1
-; SSSE3-NEXT: pand %xmm3, %xmm1
-; SSSE3-NEXT: paddd %xmm0, %xmm1
-; SSSE3-NEXT: pmuludq %xmm5, %xmm3
-; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm3[1,3,2,3]
-; SSSE3-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,0],xmm10[0,0]
-; SSSE3-NEXT: shufps {{.*#+}} xmm6 = xmm6[0,0],xmm9[0,0]
-; SSSE3-NEXT: pmuludq %xmm7, %xmm6
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[1,3,2,3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm0[0],xmm5[1],xmm0[1]
-; SSSE3-NEXT: psubd %xmm1, %xmm5
-; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm3[0,2,2,3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm6[0,2,2,3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1]
-; SSSE3-NEXT: movdqa %xmm7, (%rcx)
-; SSSE3-NEXT: psrad $31, %xmm7
-; SSSE3-NEXT: pcmpeqd %xmm5, %xmm7
-; SSSE3-NEXT: pcmpeqd %xmm0, %xmm0
-; SSSE3-NEXT: pxor %xmm0, %xmm7
-; SSSE3-NEXT: pxor %xmm1, %xmm1
-; SSSE3-NEXT: pcmpgtd %xmm2, %xmm1
-; SSSE3-NEXT: pand %xmm12, %xmm1
-; SSSE3-NEXT: pcmpgtd %xmm12, %xmm4
-; SSSE3-NEXT: pand %xmm2, %xmm4
-; SSSE3-NEXT: paddd %xmm1, %xmm4
-; SSSE3-NEXT: pmuludq %xmm12, %xmm2
-; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,3,2,3]
-; SSSE3-NEXT: pmuludq %xmm8, %xmm11
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm11[1,3,2,3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSSE3-NEXT: psubd %xmm4, %xmm1
+; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
+; SSSE3-NEXT: pand %xmm1, %xmm0
+; SSSE3-NEXT: paddd %xmm5, %xmm0
+; SSSE3-NEXT: pmuludq %xmm6, %xmm1
+; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm1[1,3,2,3]
+; SSSE3-NEXT: shufps {{.*#+}} xmm12 = xmm12[0,0],xmm10[0,0]
+; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,0],xmm8[0,0]
+; SSSE3-NEXT: pmuludq %xmm12, %xmm3
+; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = xmm3[1,3,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1]
+; SSSE3-NEXT: psubd %xmm0, %xmm5
+; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; SSSE3-NEXT: movdqa %xmm0, (%rcx)
+; SSSE3-NEXT: psrad $31, %xmm0
+; SSSE3-NEXT: pcmpeqd %xmm5, %xmm0
+; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1
+; SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm7, %xmm3
+; SSSE3-NEXT: pand %xmm11, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm11, %xmm4
+; SSSE3-NEXT: pand %xmm7, %xmm4
+; SSSE3-NEXT: paddd %xmm3, %xmm4
+; SSSE3-NEXT: pmuludq %xmm9, %xmm2
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,3,2,3]
+; SSSE3-NEXT: pshufd {{.*#+}} xmm5 = xmm13[1,3,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
+; SSSE3-NEXT: psubd %xmm4, %xmm5
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
-; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm11[0,2,2,3]
-; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSSE3-NEXT: movq %xmm2, 16(%rcx)
-; SSSE3-NEXT: psrad $31, %xmm2
-; SSSE3-NEXT: pcmpeqd %xmm1, %xmm2
-; SSSE3-NEXT: pxor %xmm0, %xmm2
-; SSSE3-NEXT: movq %xmm2, 16(%rdi)
-; SSSE3-NEXT: movdqa %xmm7, (%rdi)
+; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm13[0,2,2,3]
+; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSSE3-NEXT: movq %xmm3, 16(%rcx)
+; SSSE3-NEXT: psrad $31, %xmm3
+; SSSE3-NEXT: pcmpeqd %xmm5, %xmm3
+; SSSE3-NEXT: pxor %xmm1, %xmm3
+; SSSE3-NEXT: movq %xmm3, 16(%rdi)
+; SSSE3-NEXT: movdqa %xmm0, (%rdi)
; SSSE3-NEXT: retq
;
; SSE41-LABEL: smulo_v6i32:
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll?rev=367782&r1=367781&r2=367782&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll Sun Aug 4 05:24:40 2019
@@ -1589,9 +1589,8 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE41-NEXT: pmullw %xmm1, %xmm0
-; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,xmm1[6],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero
; SSE41-NEXT: pmullw %xmm0, %xmm1
; SSE41-NEXT: pextrb $0, %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
@@ -1603,8 +1602,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
@@ -1616,8 +1614,7 @@ define i8 @test_v4i8(<4 x i8> %a0) {
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,u,u,u,u,u,u,u,u,u,u,u,u]
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
@@ -1650,15 +1647,13 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; SSE41-NEXT: pmullw %xmm1, %xmm0
-; SSE41-NEXT: movdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm1, %xmm0
-; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SSE41-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; SSE41-NEXT: pmullw %xmm2, %xmm0
-; SSE41-NEXT: pshufb %xmm1, %xmm0
+; SSE41-NEXT: pmullw %xmm1, %xmm0
; SSE41-NEXT: movdqa %xmm0, %xmm1
-; SSE41-NEXT: psrlw $8, %xmm1
+; SSE41-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,xmm1[6],zero,xmm1[10],zero,xmm1[14],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero,xmm1[u],zero
; SSE41-NEXT: pmullw %xmm0, %xmm1
; SSE41-NEXT: pextrb $0, %xmm1, %eax
; SSE41-NEXT: # kill: def $al killed $al killed $eax
@@ -1670,14 +1665,12 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX-NEXT: vpmullw %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX-NEXT: vpmullw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpextrb $0, %xmm0, %eax
; AVX-NEXT: # kill: def $al killed $al killed $eax
@@ -1689,14 +1682,12 @@ define i8 @test_v8i8(<8 x i8> %a0) {
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0
-; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
+; AVX512-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX512-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; AVX512-NEXT: vpmullw %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0
-; AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
+; AVX512-NEXT: vpmullw %xmm0, %xmm1, %xmm0
+; AVX512-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2],zero,xmm0[6],zero,xmm0[10],zero,xmm0[14],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero,xmm0[u],zero
; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vpextrb $0, %xmm0, %eax
; AVX512-NEXT: # kill: def $al killed $al killed $eax
More information about the llvm-commits
mailing list