[llvm] r369459 - [DAGCombiner][X86] Teach visitCONCAT_VECTORS to combine (concat_vectors (concat_vectors X, Y), undef)) -> (concat_vectors X, Y, undef, undef)
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 20 15:12:50 PDT 2019
Author: ctopper
Date: Tue Aug 20 15:12:50 2019
New Revision: 369459
URL: http://llvm.org/viewvc/llvm-project?rev=369459&view=rev
Log:
[DAGCombiner][X86] Teach visitCONCAT_VECTORS to combine (concat_vectors (concat_vectors X, Y), undef)) -> (concat_vectors X, Y, undef, undef)
I also had to add a new combine to X86's combineExtractSubvector to prevent a regression.
This helps our vXi1 code see the full concat operation and allow it optimize undef to a zero if there is already a zero in the concat. This helped us use a movzx instead of an AND in some of the tests. In those tests, one concat comes from SelectionDAGBuilder and the second comes from type legalization of v4i1->i4 bitcasts which uses an additional concat. Though these changes weren't my original motivation.
I'm looking at making X86ISelLowering's narrowShuffle emit a concat_vectors instead of an insert_subvector since concat_vectors is more canonical during early DAG combine. This patch helps prevent a regression from my experiments with that.
Differential Revision: https://reviews.llvm.org/D66456
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
llvm/trunk/test/CodeGen/X86/oddshuffles.ll
llvm/trunk/test/CodeGen/X86/vec_saddo.ll
llvm/trunk/test/CodeGen/X86/vec_smulo.ll
llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
llvm/trunk/test/CodeGen/X86/vec_uaddo.ll
llvm/trunk/test/CodeGen/X86/vec_umulo.ll
llvm/trunk/test/CodeGen/X86/vec_usubo.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Aug 20 15:12:50 2019
@@ -17687,6 +17687,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
+ // If the input is a concat_vectors, just make a larger concat by padding
+ // with smaller undefs.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();
+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
SDValue Scalar = peekThroughOneUseBitcasts(In);
// concat_vectors(scalar_to_vector(scalar), undef) ->
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug 20 15:12:50 2019
@@ -44504,6 +44504,20 @@ static SDValue combineExtractSubvector(S
}
}
+ // If we are extracting from an insert into a zero vector, replace with a
+ // smaller insert into zero if we don't access less than the original
+ // subvector. Don't do this for i1 vectors.
+ if (VT.getVectorElementType() != MVT::i1 &&
+ InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 &&
+ InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) &&
+ ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) &&
+ InVec.getOperand(1).getValueSizeInBits() <= VT.getSizeInBits()) {
+ SDLoc DL(N);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ getZeroVector(VT, Subtarget, DAG, DL),
+ InVec.getOperand(1), InVec.getOperand(2));
+ }
+
// If we're extracting from a broadcast then we're better off just
// broadcasting to the smaller type directly, assuming this is the only use.
// As its a broadcast we don't care about the extraction index.
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Tue Aug 20 15:12:50 2019
@@ -2684,7 +2684,6 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2711,7 +2710,6 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2741,7 +2739,6 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2773,7 +2770,6 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2805,7 +2801,6 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2836,7 +2831,6 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7526,7 +7520,6 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7553,7 +7546,6 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7583,7 +7575,6 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7615,7 +7606,6 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7647,7 +7637,6 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7678,7 +7667,6 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12428,7 +12416,6 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12455,7 +12442,6 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12485,7 +12471,6 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12517,7 +12502,6 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12549,7 +12533,6 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12580,7 +12563,6 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17350,7 +17332,6 @@ define zeroext i4 @test_vpcmpultq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17377,7 +17358,6 @@ define zeroext i4 @test_vpcmpultq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17407,7 +17387,6 @@ define zeroext i4 @test_masked_vpcmpultq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17439,7 +17418,6 @@ define zeroext i4 @test_masked_vpcmpultq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17471,7 +17449,6 @@ define zeroext i4 @test_vpcmpultq_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17502,7 +17479,6 @@ define zeroext i4 @test_masked_vpcmpultq
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21219,7 +21195,6 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21246,7 +21221,6 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21274,7 +21248,6 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21305,7 +21278,6 @@ define zeroext i4 @test_masked_vcmpoeqpd
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21336,7 +21308,6 @@ define zeroext i4 @test_masked_vcmpoeqpd
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21368,7 +21339,6 @@ define zeroext i4 @test_masked_vcmpoeqpd
; NoVLX-NEXT: kshiftlw $14, %k0, %k0
; NoVLX-NEXT: kshiftrw $14, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: andl $3, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
Modified: llvm/trunk/test/CodeGen/X86/oddshuffles.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/oddshuffles.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/oddshuffles.ll (original)
+++ llvm/trunk/test/CodeGen/X86/oddshuffles.ll Tue Aug 20 15:12:50 2019
@@ -1513,34 +1513,34 @@ define void @interleave_24i32_in(<24 x i
; AVX1-LABEL: interleave_24i32_in:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovupd (%rsi), %ymm0
-; AVX1-NEXT: vmovups 16(%rcx), %xmm1
-; AVX1-NEXT: vmovups (%rdx), %xmm2
-; AVX1-NEXT: vmovups 16(%rdx), %xmm3
-; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm3[3,0],xmm1[3,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,1],xmm4[0,2]
-; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,0],xmm3[1,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[2,2]
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,1,3,3]
-; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,2,3]
-; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2],ymm1[3,4],ymm3[5],ymm1[6,7]
+; AVX1-NEXT: vmovups (%rdx), %xmm1
+; AVX1-NEXT: vmovups 16(%rdx), %xmm2
; AVX1-NEXT: vmovups (%rsi), %xmm3
-; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm3[2,0],xmm2[2,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[1,1],xmm4[0,2]
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm3[0,0]
-; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm3[2,1]
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm3[2,0],xmm1[2,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm1[1,1],xmm4[0,2]
+; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm3[0,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[2,1]
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm1, %ymm1
; AVX1-NEXT: vpermilps {{.*#+}} xmm3 = mem[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm3, %ymm3
+; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2],ymm1[3,4],ymm3[5],ymm1[6,7]
+; AVX1-NEXT: vmovups 16(%rcx), %xmm3
+; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm2[3,0],xmm3[3,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm4 = xmm3[2,1],xmm4[0,2]
+; AVX1-NEXT: vshufps {{.*#+}} xmm3 = xmm3[1,0],xmm2[1,0]
+; AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm3[2,0],xmm2[2,2]
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = ymm0[1,1,3,3]
+; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm3[2,3,2,3]
; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm3[2],ymm2[3,4],ymm3[5],ymm2[6,7]
-; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm3 = mem[1,1,2,2]
+; AVX1-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm3[0],ymm0[1],ymm3[2,3],ymm0[4],ymm3[5,6],ymm0[7]
; AVX1-NEXT: vpermilps {{.*#+}} ymm3 = mem[0,0,3,3,4,4,7,7]
; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4],ymm3[5],ymm0[6,7]
; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
-; AVX1-NEXT: vmovups %ymm2, (%rdi)
-; AVX1-NEXT: vmovups %ymm1, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm2, 64(%rdi)
+; AVX1-NEXT: vmovups %ymm1, (%rdi)
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -1549,17 +1549,17 @@ define void @interleave_24i32_in(<24 x i
; AVX2-SLOW-NEXT: vmovups (%rsi), %ymm0
; AVX2-SLOW-NEXT: vmovups (%rdx), %ymm1
; AVX2-SLOW-NEXT: vmovups (%rcx), %ymm2
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm3 = ymm2[2,1,3,3]
-; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[1,2,3,3,5,6,7,7]
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[2,2,2,3]
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm3 = mem[1,0,2,2]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm3 = ymm3[0,1,0,1]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,0,2,1]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7]
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,3,3,3]
+; AVX2-SLOW-NEXT: vbroadcastsd (%rcx), %ymm4
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
-; AVX2-SLOW-NEXT: vpermilps {{.*#+}} xmm4 = mem[1,0,2,2]
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm4[0,1,0,1]
-; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,0,2,1]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[2,1,3,3]
+; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm5 = ymm1[1,2,3,3,5,6,7,7]
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm5[2,2,2,3]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm5[0],ymm4[1],ymm5[2,3],ymm4[4],ymm5[5,6],ymm4[7]
-; AVX2-SLOW-NEXT: vbroadcastsd (%rcx), %ymm5
+; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,3,3,3]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
; AVX2-SLOW-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2]
@@ -1567,8 +1567,8 @@ define void @interleave_24i32_in(<24 x i
; AVX2-SLOW-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; AVX2-SLOW-NEXT: vmovups %ymm0, 32(%rdi)
-; AVX2-SLOW-NEXT: vmovups %ymm4, (%rdi)
-; AVX2-SLOW-NEXT: vmovups %ymm3, 64(%rdi)
+; AVX2-SLOW-NEXT: vmovups %ymm4, 64(%rdi)
+; AVX2-SLOW-NEXT: vmovups %ymm3, (%rdi)
; AVX2-SLOW-NEXT: vzeroupper
; AVX2-SLOW-NEXT: retq
;
@@ -1577,27 +1577,27 @@ define void @interleave_24i32_in(<24 x i
; AVX2-FAST-NEXT: vmovups (%rsi), %ymm0
; AVX2-FAST-NEXT: vmovups (%rdx), %ymm1
; AVX2-FAST-NEXT: vmovups (%rcx), %ymm2
-; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm3 = [5,6,5,6,5,6,7,7]
+; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm3 = [1,0,2,2,1,0,2,2]
+; AVX2-FAST-NEXT: # ymm3 = mem[0,1,0,1]
; AVX2-FAST-NEXT: vpermps %ymm1, %ymm3, %ymm3
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm2[2,1,3,3]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0],ymm4[1],ymm3[2,3],ymm4[4],ymm3[5,6],ymm4[7]
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,3,3,3]
+; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[0,0,2,1]
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7]
+; AVX2-FAST-NEXT: vbroadcastsd (%rcx), %ymm4
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm4 = ymm0[1,1,2,2]
+; AVX2-FAST-NEXT: vmovaps {{.*#+}} ymm4 = [5,6,5,6,5,6,7,7]
+; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm4
+; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm2[2,1,3,3]
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0],ymm5[1],ymm4[2,3],ymm5[4],ymm4[5,6],ymm5[7]
+; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm5 = ymm0[0,3,3,3]
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm4 = ymm4[0,1],ymm5[2],ymm4[3,4],ymm5[5],ymm4[6,7]
+; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[1,1,2,2]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0],ymm4[1],ymm2[2,3],ymm4[4],ymm2[5,6],ymm4[7]
-; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm4 = ymm1[0,0,3,3,4,4,7,7]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
-; AVX2-FAST-NEXT: vbroadcastf128 {{.*#+}} ymm4 = [1,0,2,2,1,0,2,2]
-; AVX2-FAST-NEXT: # ymm4 = mem[0,1,0,1]
-; AVX2-FAST-NEXT: vpermps %ymm1, %ymm4, %ymm1
-; AVX2-FAST-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,1]
-; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3],ymm1[4],ymm0[5,6],ymm1[7]
-; AVX2-FAST-NEXT: vbroadcastsd (%rcx), %ymm1
+; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3],ymm0[4],ymm2[5,6],ymm0[7]
+; AVX2-FAST-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,0,3,3,4,4,7,7]
; AVX2-FAST-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
-; AVX2-FAST-NEXT: vmovups %ymm0, (%rdi)
-; AVX2-FAST-NEXT: vmovups %ymm2, 32(%rdi)
-; AVX2-FAST-NEXT: vmovups %ymm3, 64(%rdi)
+; AVX2-FAST-NEXT: vmovups %ymm0, 32(%rdi)
+; AVX2-FAST-NEXT: vmovups %ymm4, 64(%rdi)
+; AVX2-FAST-NEXT: vmovups %ymm3, (%rdi)
; AVX2-FAST-NEXT: vzeroupper
; AVX2-FAST-NEXT: retq
;
@@ -1605,32 +1605,32 @@ define void @interleave_24i32_in(<24 x i
; XOP: # %bb.0:
; XOP-NEXT: vmovupd (%rsi), %ymm0
; XOP-NEXT: vmovups (%rcx), %ymm1
-; XOP-NEXT: vmovups 16(%rcx), %xmm2
-; XOP-NEXT: vmovups (%rdx), %xmm3
-; XOP-NEXT: vmovups 16(%rdx), %xmm4
-; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[3,0],xmm2[3,0]
-; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm2[2,1],xmm5[0,2]
-; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[1,0],xmm4[1,0]
-; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm4[2,2]
-; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
-; XOP-NEXT: vpermilpd {{.*#+}} ymm4 = ymm0[1,1,3,3]
-; XOP-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm4[2,3,2,3]
-; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
+; XOP-NEXT: vmovups (%rdx), %xmm2
+; XOP-NEXT: vmovups 16(%rdx), %xmm3
; XOP-NEXT: vmovups (%rsi), %xmm4
-; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,0],xmm3[2,0]
-; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm3[1,1],xmm5[0,2]
-; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm3[0,0],xmm4[0,0]
-; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm3[2,0],xmm4[2,1]
-; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
+; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,0],xmm2[2,0]
+; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm2[1,1],xmm5[0,2]
+; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[0,0],xmm4[0,0]
+; XOP-NEXT: vshufps {{.*#+}} xmm2 = xmm2[2,0],xmm4[2,1]
+; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm2, %ymm2
; XOP-NEXT: vpermilps {{.*#+}} xmm4 = mem[0,1,0,1]
; XOP-NEXT: vinsertf128 $1, %xmm4, %ymm4, %ymm4
+; XOP-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1],ymm4[2],ymm2[3,4],ymm4[5],ymm2[6,7]
+; XOP-NEXT: vmovups 16(%rcx), %xmm4
+; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm3[3,0],xmm4[3,0]
+; XOP-NEXT: vshufps {{.*#+}} xmm5 = xmm4[2,1],xmm5[0,2]
+; XOP-NEXT: vshufps {{.*#+}} xmm4 = xmm4[1,0],xmm3[1,0]
+; XOP-NEXT: vshufps {{.*#+}} xmm3 = xmm4[2,0],xmm3[2,2]
+; XOP-NEXT: vinsertf128 $1, %xmm5, %ymm3, %ymm3
+; XOP-NEXT: vpermilpd {{.*#+}} ymm4 = ymm0[1,1,3,3]
+; XOP-NEXT: vperm2f128 {{.*#+}} ymm4 = ymm4[2,3,2,3]
; XOP-NEXT: vblendps {{.*#+}} ymm3 = ymm3[0,1],ymm4[2],ymm3[3,4],ymm4[5],ymm3[6,7]
; XOP-NEXT: vpermil2ps {{.*#+}} ymm0 = ymm1[2],ymm0[3],ymm1[2,3],ymm0[4],ymm1[5,4],ymm0[5]
; XOP-NEXT: vpermilps {{.*#+}} ymm1 = mem[0,0,3,3,4,4,7,7]
; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3,4],ymm1[5],ymm0[6,7]
; XOP-NEXT: vmovups %ymm0, 32(%rdi)
-; XOP-NEXT: vmovups %ymm3, (%rdi)
-; XOP-NEXT: vmovups %ymm2, 64(%rdi)
+; XOP-NEXT: vmovups %ymm3, 64(%rdi)
+; XOP-NEXT: vmovups %ymm2, (%rdi)
; XOP-NEXT: vzeroupper
; XOP-NEXT: retq
%s1 = load <8 x i32>, <8 x i32>* %q1, align 4
Modified: llvm/trunk/test/CodeGen/X86/vec_saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_saddo.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_saddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_saddo.ll Tue Aug 20 15:12:50 2019
@@ -1791,48 +1791,48 @@ define <2 x i32> @saddo_v2i128(<2 x i128
;
; AVX512-LABEL: saddo_v2i128:
; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %rcx, %r14
-; AVX512-NEXT: adcq %r11, %r14
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: testq %rcx, %rcx
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %bl
-; AVX512-NEXT: testq %r11, %r11
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: andb %bl, %al
-; AVX512-NEXT: kmovd %eax, %k0
-; AVX512-NEXT: kshiftlw $1, %k0, %k0
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: sete %al
+; AVX512-NEXT: setns %bl
+; AVX512-NEXT: cmpb %al, %bl
+; AVX512-NEXT: sete %bpl
; AVX512-NEXT: addq %r8, %rdi
; AVX512-NEXT: adcq %r9, %rsi
+; AVX512-NEXT: setns %al
+; AVX512-NEXT: cmpb %al, %bl
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: andb %bpl, %al
+; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
+; AVX512-NEXT: movq %rcx, %rbp
+; AVX512-NEXT: adcq %r10, %rbp
+; AVX512-NEXT: setns %bl
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: setns %cl
+; AVX512-NEXT: cmpb %bl, %cl
+; AVX512-NEXT: setne %r8b
+; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %cl
-; AVX512-NEXT: andb %al, %cl
-; AVX512-NEXT: andl $1, %ecx
-; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: sete %cl
+; AVX512-NEXT: andb %r8b, %cl
+; AVX512-NEXT: kmovd %ecx, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %rdx, 16(%r10)
-; AVX512-NEXT: movq %rdi, (%r10)
-; AVX512-NEXT: movq %r14, 24(%r10)
-; AVX512-NEXT: movq %rsi, 8(%r10)
+; AVX512-NEXT: movq %rdx, 16(%r11)
+; AVX512-NEXT: movq %rdi, (%r11)
+; AVX512-NEXT: movq %rbp, 24(%r11)
+; AVX512-NEXT: movq %rsi, 8(%r11)
; AVX512-NEXT: popq %rbx
-; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
Modified: llvm/trunk/test/CodeGen/X86/vec_smulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_smulo.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_smulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_smulo.ll Tue Aug 20 15:12:50 2019
@@ -2605,9 +2605,9 @@ define <2 x i32> @smulo_v2i128(<2 x i128
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
; AVX512-NEXT: kmovd %ecx, %k0
-; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
Modified: llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ssubo.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ssubo.ll Tue Aug 20 15:12:50 2019
@@ -1830,48 +1830,48 @@ define <2 x i32> @ssubo_v2i128(<2 x i128
;
; AVX512-LABEL: ssubo_v2i128:
; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %r14
+; AVX512-NEXT: pushq %rbp
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
-; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %rcx, %r14
-; AVX512-NEXT: sbbq %r11, %r14
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: testq %rcx, %rcx
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %bl
-; AVX512-NEXT: testq %r11, %r11
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: andb %bl, %al
-; AVX512-NEXT: kmovd %eax, %k0
-; AVX512-NEXT: kshiftlw $1, %k0, %k0
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: testq %r9, %r9
; AVX512-NEXT: setns %al
; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setns %cl
-; AVX512-NEXT: cmpb %al, %cl
-; AVX512-NEXT: setne %al
+; AVX512-NEXT: setns %bl
+; AVX512-NEXT: cmpb %al, %bl
+; AVX512-NEXT: setne %bpl
; AVX512-NEXT: subq %r8, %rdi
; AVX512-NEXT: sbbq %r9, %rsi
+; AVX512-NEXT: setns %al
+; AVX512-NEXT: cmpb %al, %bl
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: andb %bpl, %al
+; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
+; AVX512-NEXT: movq %rcx, %rbp
+; AVX512-NEXT: sbbq %r10, %rbp
+; AVX512-NEXT: setns %bl
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: setns %cl
+; AVX512-NEXT: cmpb %bl, %cl
+; AVX512-NEXT: setne %r8b
+; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
-; AVX512-NEXT: andb %al, %cl
-; AVX512-NEXT: andl $1, %ecx
-; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: andb %r8b, %cl
+; AVX512-NEXT: kmovd %ecx, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %rdx, 16(%r10)
-; AVX512-NEXT: movq %rdi, (%r10)
-; AVX512-NEXT: movq %r14, 24(%r10)
-; AVX512-NEXT: movq %rsi, 8(%r10)
+; AVX512-NEXT: movq %rdx, 16(%r11)
+; AVX512-NEXT: movq %rdi, (%r11)
+; AVX512-NEXT: movq %rbp, 24(%r11)
+; AVX512-NEXT: movq %rsi, 8(%r11)
; AVX512-NEXT: popq %rbx
-; AVX512-NEXT: popq %r14
+; AVX512-NEXT: popq %rbp
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
Modified: llvm/trunk/test/CodeGen/X86/vec_uaddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_uaddo.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_uaddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_uaddo.ll Tue Aug 20 15:12:50 2019
@@ -1282,16 +1282,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128
; AVX512-LABEL: uaddo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: addq %r8, %rdi
+; AVX512-NEXT: adcq %r9, %rsi
+; AVX512-NEXT: setb %r8b
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: addq %r8, %rdi
-; AVX512-NEXT: adcq %r9, %rsi
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: andl $1, %eax
-; AVX512-NEXT: kmovw %eax, %k1
+; AVX512-NEXT: andl $1, %r8d
+; AVX512-NEXT: kmovw %r8d, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
Modified: llvm/trunk/test/CodeGen/X86/vec_umulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_umulo.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_umulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_umulo.ll Tue Aug 20 15:12:50 2019
@@ -2451,66 +2451,68 @@ define <2 x i32> @umulo_v2i128(<2 x i128
; AVX512-NEXT: pushq %r13
; AVX512-NEXT: pushq %r12
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: movq %rcx, %rax
-; AVX512-NEXT: movq %rdx, %r12
-; AVX512-NEXT: movq %rdi, %r11
+; AVX512-NEXT: movq %r9, %r10
+; AVX512-NEXT: movq %rcx, %r9
+; AVX512-NEXT: movq %rdx, %r11
+; AVX512-NEXT: movq %rsi, %rax
+; AVX512-NEXT: movq %rdi, %rsi
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setne %dl
-; AVX512-NEXT: testq %rcx, %rcx
-; AVX512-NEXT: setne %r13b
-; AVX512-NEXT: andb %dl, %r13b
-; AVX512-NEXT: mulq %r15
-; AVX512-NEXT: movq %rax, %rdi
+; AVX512-NEXT: testq %rax, %rax
+; AVX512-NEXT: setne %bl
+; AVX512-NEXT: andb %dl, %bl
+; AVX512-NEXT: mulq %r8
+; AVX512-NEXT: movq %rax, %r13
; AVX512-NEXT: seto %bpl
; AVX512-NEXT: movq %r10, %rax
-; AVX512-NEXT: mulq %r12
-; AVX512-NEXT: movq %rax, %rbx
+; AVX512-NEXT: mulq %rdi
+; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: seto %cl
; AVX512-NEXT: orb %bpl, %cl
-; AVX512-NEXT: addq %rdi, %rbx
-; AVX512-NEXT: movq %r12, %rax
-; AVX512-NEXT: mulq %r15
-; AVX512-NEXT: movq %rax, %r10
-; AVX512-NEXT: movq %rdx, %r15
-; AVX512-NEXT: addq %rbx, %r15
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: orb %cl, %al
-; AVX512-NEXT: orb %r13b, %al
-; AVX512-NEXT: kmovd %eax, %k0
-; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: testq %r9, %r9
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setne %cl
-; AVX512-NEXT: andb %al, %cl
+; AVX512-NEXT: addq %r13, %rdi
; AVX512-NEXT: movq %rsi, %rax
; AVX512-NEXT: mulq %r8
-; AVX512-NEXT: movq %rax, %rsi
-; AVX512-NEXT: seto %bpl
+; AVX512-NEXT: movq %rax, %r8
+; AVX512-NEXT: movq %rdx, %r10
+; AVX512-NEXT: addq %rdi, %r10
+; AVX512-NEXT: setb %sil
+; AVX512-NEXT: orb %cl, %sil
+; AVX512-NEXT: orb %bl, %sil
+; AVX512-NEXT: testq %r12, %r12
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: setne %bpl
+; AVX512-NEXT: andb %al, %bpl
; AVX512-NEXT: movq %r9, %rax
-; AVX512-NEXT: mulq %r11
+; AVX512-NEXT: mulq %r15
; AVX512-NEXT: movq %rax, %rdi
-; AVX512-NEXT: seto %bl
-; AVX512-NEXT: orb %bpl, %bl
-; AVX512-NEXT: addq %rsi, %rdi
+; AVX512-NEXT: seto %r9b
+; AVX512-NEXT: movq %r12, %rax
+; AVX512-NEXT: mulq %r11
+; AVX512-NEXT: movq %rax, %rbx
+; AVX512-NEXT: seto %cl
+; AVX512-NEXT: orb %r9b, %cl
+; AVX512-NEXT: addq %rdi, %rbx
; AVX512-NEXT: movq %r11, %rax
-; AVX512-NEXT: mulq %r8
-; AVX512-NEXT: addq %rdi, %rdx
-; AVX512-NEXT: setb %sil
-; AVX512-NEXT: orb %bl, %sil
-; AVX512-NEXT: orb %cl, %sil
+; AVX512-NEXT: mulq %r15
+; AVX512-NEXT: addq %rbx, %rdx
+; AVX512-NEXT: setb %dil
+; AVX512-NEXT: orb %cl, %dil
+; AVX512-NEXT: orb %bpl, %dil
+; AVX512-NEXT: kmovd %edi, %k0
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %esi
; AVX512-NEXT: kmovw %esi, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %r10, 16(%r14)
-; AVX512-NEXT: movq %rax, (%r14)
-; AVX512-NEXT: movq %r15, 24(%r14)
-; AVX512-NEXT: movq %rdx, 8(%r14)
+; AVX512-NEXT: movq %rax, 16(%r14)
+; AVX512-NEXT: movq %r8, (%r14)
+; AVX512-NEXT: movq %rdx, 24(%r14)
+; AVX512-NEXT: movq %r10, 8(%r14)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
; AVX512-NEXT: popq %r13
Modified: llvm/trunk/test/CodeGen/X86/vec_usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_usubo.ll?rev=369459&r1=369458&r2=369459&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_usubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_usubo.ll Tue Aug 20 15:12:50 2019
@@ -1329,16 +1329,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128
; AVX512-LABEL: usubo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
+; AVX512-NEXT: subq %r8, %rdi
+; AVX512-NEXT: sbbq %r9, %rsi
+; AVX512-NEXT: setb %r8b
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
; AVX512-NEXT: kmovd %eax, %k0
; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: subq %r8, %rdi
-; AVX512-NEXT: sbbq %r9, %rsi
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: andl $1, %eax
-; AVX512-NEXT: kmovw %eax, %k1
+; AVX512-NEXT: andl $1, %r8d
+; AVX512-NEXT: kmovw %r8d, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
More information about the llvm-commits
mailing list