[llvm] r359686 - [X86][SSE] Add demanded elts support X86ISD::PMULDQ\PMULUDQ
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed May 1 07:50:50 PDT 2019
Author: rksimon
Date: Wed May 1 07:50:50 2019
New Revision: 359686
URL: http://llvm.org/viewvc/llvm-project?rev=359686&view=rev
Log:
[X86][SSE] Add demanded elts support X86ISD::PMULDQ\PMULUDQ
Add to SimplifyDemandedVectorEltsForTargetNode and SimplifyDemandedBitsForTargetNode
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll
llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=359686&r1=359685&r2=359686&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 1 07:50:50 2019
@@ -33237,6 +33237,22 @@ bool X86TargetLowering::SimplifyDemanded
// Handle special case opcodes.
switch (Opc) {
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ: {
+ APInt LHSUndef, LHSZero;
+ APInt RHSUndef, RHSZero;
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO,
+ Depth + 1))
+ return true;
+ // Multiply by zero.
+ KnownZero = LHSZero | RHSZero;
+ break;
+ }
case X86ISD::VSHL:
case X86ISD::VSRL:
case X86ISD::VSRA: {
@@ -33433,7 +33449,10 @@ bool X86TargetLowering::SimplifyDemanded
SDValue Insert =
insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits);
return TLO.CombineTo(Op, Insert);
- }
+ }
+ // Arithmetic Ops.
+ case X86ISD::PMULDQ:
+ case X86ISD::PMULUDQ:
// Target Shuffles.
case X86ISD::PSHUFB:
case X86ISD::UNPCKL:
@@ -33552,9 +33571,11 @@ bool X86TargetLowering::SimplifyDemanded
SDValue RHS = Op.getOperand(1);
// FIXME: Can we bound this better?
APInt DemandedMask = APInt::getLowBitsSet(64, 32);
- if (SimplifyDemandedBits(LHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp,
+ TLO, Depth + 1))
return true;
- if (SimplifyDemandedBits(RHS, DemandedMask, KnownOp, TLO, Depth + 1))
+ if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp,
+ TLO, Depth + 1))
return true;
break;
}
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll?rev=359686&r1=359685&r2=359686&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-mul-widen.ll Wed May 1 07:50:50 2019
@@ -153,13 +153,13 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -177,13 +177,13 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX512BW-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -192,22 +192,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BWVL-LABEL: test_v4i64:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -345,13 +345,13 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -369,22 +369,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -402,22 +402,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -648,13 +648,13 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -680,22 +680,22 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -721,22 +721,22 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
Modified: llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll?rev=359686&r1=359685&r2=359686&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-reduce-mul.ll Wed May 1 07:50:50 2019
@@ -153,13 +153,13 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -177,13 +177,13 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX512BW-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX512BW-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX512BW-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX512BW-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -192,22 +192,22 @@ define i64 @test_v4i64(<4 x i64> %a0) {
; AVX512BWVL-LABEL: test_v4i64:
; AVX512BWVL: # %bb.0:
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm3, %ymm2
-; AVX512BWVL-NEXT: vpsllq $32, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
-; AVX512BWVL-NEXT: vpaddq %ymm2, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm0, %ymm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm2, %ymm2
-; AVX512BWVL-NEXT: vpsrlq $32, %ymm1, %ymm3
-; AVX512BWVL-NEXT: vpmuludq %ymm3, %ymm0, %ymm3
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -345,13 +345,13 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -369,22 +369,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -402,22 +402,22 @@ define i64 @test_v8i64(<8 x i64> %a0) {
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
@@ -648,13 +648,13 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
; AVX2-NEXT: vpaddq %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-NEXT: vpsrldq {{.*#+}} ymm2 = ymm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
-; AVX2-NEXT: vpmuludq %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpsrlq $32, %ymm0, %ymm3
-; AVX2-NEXT: vpmuludq %ymm1, %ymm3, %ymm3
-; AVX2-NEXT: vpaddq %xmm3, %xmm2, %xmm2
+; AVX2-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX2-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX2-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX2-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX2-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX2-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX2-NEXT: vpmuludq %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX2-NEXT: vmovq %xmm0, %rax
; AVX2-NEXT: vzeroupper
@@ -680,22 +680,22 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BW-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BW-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BW-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BW-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BW-NEXT: vpsrldq {{.*#+}} xmm3 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; AVX512BW-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BW-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BW-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BW-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, %rax
; AVX512BW-NEXT: vzeroupper
@@ -721,22 +721,22 @@ define i64 @test_v16i64(<16 x i64> %a0)
; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm3, %zmm2
-; AVX512BWVL-NEXT: vpsllq $32, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
-; AVX512BWVL-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
+; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm0, %zmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm2, %zmm2
-; AVX512BWVL-NEXT: vpsrlq $32, %zmm1, %zmm3
-; AVX512BWVL-NEXT: vpmuludq %zmm3, %zmm0, %zmm3
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm0, %xmm2
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm2, %xmm2
+; AVX512BWVL-NEXT: vpsrlq $32, %xmm1, %xmm3
+; AVX512BWVL-NEXT: vpmuludq %xmm3, %xmm0, %xmm3
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm3, %xmm2
; AVX512BWVL-NEXT: vpsllq $32, %xmm2, %xmm2
-; AVX512BWVL-NEXT: vpmuludq %zmm1, %zmm0, %zmm0
+; AVX512BWVL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpaddq %xmm2, %xmm0, %xmm0
; AVX512BWVL-NEXT: vmovq %xmm0, %rax
; AVX512BWVL-NEXT: vzeroupper
More information about the llvm-commits
mailing list