[llvm] r361854 - [DAGCombiner][X86][AArch64] (x - C) + y -> (x + y) - C fold

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Tue May 28 10:54:05 PDT 2019


Author: lebedevri
Date: Tue May 28 10:54:04 2019
New Revision: 361854

URL: http://llvm.org/viewvc/llvm-project?rev=361854&view=rev
Log:
[DAGCombiner][X86][AArch64] (x - C) + y  ->  (x + y) - C  fold

Summary:
Only vector tests are being affected here,
since subtraction by scalar constant is rewritten
as addition by negated constant.

No surprising test changes.

https://rise4fun.com/Alive/pbT

Reviewers: RKSimon, craig.topper, spatel

Reviewed By: RKSimon

Subscribers: javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62257

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll
    llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=361854&r1=361853&r2=361854&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue May 28 10:54:04 2019
@@ -2454,6 +2454,14 @@ SDValue DAGCombiner::visitADDLikeCommuta
   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
     return V;
 
+  // Hoist one-use subtraction by constant:  (x - C) + y  ->  (x + y) - C
+  // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+  if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+      isConstantOrConstantVector(N0.getOperand(1))) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+  }
+
   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
   // rather than 'add 0/-1' (the zext should get folded).
   // add (sext i1 Y), X --> sub X, (zext i1 Y)

Modified: llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll?rev=361854&r1=361853&r2=361854&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/sink-addsub-of-const.ll Tue May 28 10:54:04 2019
@@ -218,8 +218,8 @@ define <4 x i32> @vec_sink_sub_of_const_
 ; CHECK-NEXT:    adrp x8, .LCPI14_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI14_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -232,8 +232,8 @@ define <4 x i32> @vec_sink_sub_of_const_
 ; CHECK-NEXT:    adrp x8, .LCPI15_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI15_0]
 ; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,8 +360,8 @@ define <4 x i32> @vec_sink_sub_from_cons
 ; CHECK-NEXT:    adrp x8, .LCPI23_0
 ; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI23_0]
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    add v0.4s, v0.4s, v2.4s
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v3.4s
-; CHECK-NEXT:    add v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0

Modified: llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll?rev=361854&r1=361853&r2=361854&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sink-addsub-of-const.ll Tue May 28 10:54:04 2019
@@ -341,16 +341,16 @@ define <4 x i32> @vec_sink_add_of_const_
 define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add0:
 ; X32:       # %bb.0:
+; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add0:
 ; X64:       # %bb.0:
+; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -360,16 +360,16 @@ define <4 x i32> @vec_sink_sub_of_const_
 define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
 ; X32-LABEL: vec_sink_sub_of_const_to_add1:
 ; X32:       # %bb.0:
+; X32-NEXT:    paddd %xmm2, %xmm1
 ; X32-NEXT:    paddd %xmm1, %xmm0
 ; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    paddd %xmm2, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_of_const_to_add1:
 ; X64:       # %bb.0:
+; X64-NEXT:    paddd %xmm2, %xmm1
 ; X64-NEXT:    paddd %xmm1, %xmm0
 ; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
-; X64-NEXT:    paddd %xmm2, %xmm0
 ; X64-NEXT:    retq
   %t0 = add <4 x i32> %a, %b
   %t1 = sub <4 x i32> %t0, <i32 42, i32 24, i32 undef, i32 46>
@@ -525,15 +525,15 @@ define <4 x i32> @vec_sink_sub_from_cons
 ; X32-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X32:       # %bb.0:
 ; X32-NEXT:    psubd %xmm1, %xmm0
-; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    paddd %xmm2, %xmm0
+; X32-NEXT:    psubd {{\.LCPI.*}}, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: vec_sink_sub_from_const_to_sub2:
 ; X64:       # %bb.0:
 ; X64-NEXT:    psubd %xmm1, %xmm0
-; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    paddd %xmm2, %xmm0
+; X64-NEXT:    psubd {{.*}}(%rip), %xmm0
 ; X64-NEXT:    retq
   %t0 = sub <4 x i32> %a, %b
   %t1 = sub <4 x i32> <i32 42, i32 24, i32 undef, i32 46>, %t0

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll?rev=361854&r1=361853&r2=361854&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll Tue May 28 10:54:04 2019
@@ -186,10 +186,10 @@ define <16 x i8> @test_div7_16i8(<16 x i
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
-; SSE2-NEXT:    psubb %xmm2, %xmm0
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm0, %xmm1
+; SSE2-NEXT:    psubb %xmm2, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
@@ -210,10 +210,10 @@ define <16 x i8> @test_div7_16i8(<16 x i
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm0
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm2, %xmm0
-; SSE41-NEXT:    psubb %xmm2, %xmm0
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm0, %xmm1
+; SSE41-NEXT:    psubb %xmm2, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm0
 ; SSE41-NEXT:    retq
 ;
@@ -233,10 +233,10 @@ define <16 x i8> @test_div7_16i8(<16 x i
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
 ;
 ; AVX2NOBW-LABEL: test_div7_16i8:
@@ -251,10 +251,10 @@ define <16 x i8> @test_div7_16i8(<16 x i
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX2NOBW-NEXT:    vzeroupper
 ; AVX2NOBW-NEXT:    retq
 ;
@@ -269,10 +269,10 @@ define <16 x i8> @test_div7_16i8(<16 x i
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm2, %xmm1, %xmm1
-; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX512BW-NEXT:    vpsubb %xmm2, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
   %res = sdiv <16 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
@@ -657,10 +657,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE2-NEXT:    pxor %xmm3, %xmm2
-; SSE2-NEXT:    psubb %xmm3, %xmm2
 ; SSE2-NEXT:    psrlw $7, %xmm1
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE2-NEXT:    paddb %xmm2, %xmm1
+; SSE2-NEXT:    psubb %xmm3, %xmm1
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psllw $3, %xmm2
 ; SSE2-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -685,10 +685,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
 ; SSE41-NEXT:    movdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; SSE41-NEXT:    pxor %xmm3, %xmm2
-; SSE41-NEXT:    psubb %xmm3, %xmm2
 ; SSE41-NEXT:    psrlw $7, %xmm1
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm1
 ; SSE41-NEXT:    paddb %xmm2, %xmm1
+; SSE41-NEXT:    psubb %xmm3, %xmm1
 ; SSE41-NEXT:    movdqa %xmm1, %xmm2
 ; SSE41-NEXT:    psllw $3, %xmm2
 ; SSE41-NEXT:    pand {{.*}}(%rip), %xmm2
@@ -712,10 +712,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX1-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX1-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX1-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX1-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -734,10 +734,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX2NOBW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX2NOBW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1
@@ -756,10 +756,10 @@ define <16 x i8> @test_rem7_16i8(<16 x i
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} xmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %xmm3, %xmm2, %xmm2
-; AVX512BW-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpaddb %xmm1, %xmm2, %xmm1
+; AVX512BW-NEXT:    vpsubb %xmm3, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpsllw $3, %xmm1, %xmm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm2, %xmm2
 ; AVX512BW-NEXT:    vpsubb %xmm2, %xmm1, %xmm1

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll?rev=361854&r1=361853&r2=361854&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-256.ll Tue May 28 10:54:04 2019
@@ -177,8 +177,8 @@ define <32 x i8> @test_div7_32i8(<32 x i
 ; AVX1-NEXT:    vpand %xmm5, %xmm1, %xmm1
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm6, %xmm1, %xmm1
-; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT:    vpsubb %xmm6, %xmm1, %xmm1
 ; AVX1-NEXT:    vpmovsxbw %xmm0, %xmm2
 ; AVX1-NEXT:    vpmullw %xmm3, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsrlw $8, %xmm2, %xmm2
@@ -193,8 +193,8 @@ define <32 x i8> @test_div7_32i8(<32 x i
 ; AVX1-NEXT:    vpsrlw $2, %xmm0, %xmm0
 ; AVX1-NEXT:    vpand %xmm5, %xmm0, %xmm0
 ; AVX1-NEXT:    vpxor %xmm6, %xmm0, %xmm0
-; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpsubb %xmm6, %xmm0, %xmm0
 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
@@ -215,10 +215,10 @@ define <32 x i8> @test_div7_32i8(<32 x i
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX2NOBW-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_32i8:
@@ -232,10 +232,10 @@ define <32 x i8> @test_div7_32i8(<32 x i
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm2, %ymm1, %ymm1
-; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm0, %ymm0
 ; AVX512BW-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512BW-NEXT:    vpsubb %ymm2, %ymm0, %ymm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <32 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <32 x i8> %res
@@ -588,8 +588,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm4, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm4
 ; AVX1-NEXT:    vmovdqa {{.*#+}} xmm5 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX1-NEXT:    vpand %xmm5, %xmm4, %xmm4
@@ -609,8 +609,8 @@ define <32 x i8> @test_rem7_32i8(<32 x i
 ; AVX1-NEXT:    vpsrlw $2, %xmm2, %xmm2
 ; AVX1-NEXT:    vpand %xmm6, %xmm2, %xmm2
 ; AVX1-NEXT:    vpxor %xmm7, %xmm2, %xmm2
-; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpaddb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT:    vpsubb %xmm7, %xmm2, %xmm2
 ; AVX1-NEXT:    vpsllw $3, %xmm2, %xmm3
 ; AVX1-NEXT:    vpand %xmm5, %xmm3, %xmm3
 ; AVX1-NEXT:    vpsubb %xmm3, %xmm2, %xmm2
@@ -635,10 +635,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX2NOBW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
-; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX2NOBW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX2NOBW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX2NOBW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX2NOBW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1
@@ -656,10 +656,10 @@ define <32 x i8> @test_rem7_32i8(<32 x i
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vmovdqa {{.*#+}} ymm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxor %ymm3, %ymm2, %ymm2
-; AVX512BW-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsrlw $7, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512BW-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
 ; AVX512BW-NEXT:    vpsllw $3, %ymm1, %ymm2
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512BW-NEXT:    vpsubb %ymm2, %ymm1, %ymm1

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll?rev=361854&r1=361853&r2=361854&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-512.ll Tue May 28 10:54:04 2019
@@ -146,8 +146,8 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512F-NEXT:    vpand %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm6 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm0, %ymm0
-; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpsubb %ymm6, %ymm0, %ymm0
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpmovsxbw %xmm2, %ymm2
 ; AVX512F-NEXT:    vpmullw %ymm3, %ymm2, %ymm2
@@ -163,8 +163,8 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512F-NEXT:    vpsrlw $2, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpand %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpxor %ymm6, %ymm1, %ymm1
-; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vpsubb %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: test_div7_64i8:
@@ -185,10 +185,10 @@ define <64 x i8> @test_div7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm2, %zmm1, %zmm1
-; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsubb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
   %res = sdiv <64 x i8> %a, <i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,i8 7, i8 7, i8 7, i8 7>
   ret <64 x i8> %res
@@ -486,8 +486,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm7 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm4, %ymm2, %ymm2
+; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm4
 ; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm8 = [248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248,248]
 ; AVX512F-NEXT:    vpand %ymm8, %ymm4, %ymm4
@@ -508,8 +508,8 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512F-NEXT:    vpsrlw $2, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpand %ymm6, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpxor %ymm7, %ymm2, %ymm2
-; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpaddb %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT:    vpsubb %ymm7, %ymm2, %ymm2
 ; AVX512F-NEXT:    vpsllw $3, %ymm2, %ymm3
 ; AVX512F-NEXT:    vpand %ymm8, %ymm3, %ymm3
 ; AVX512F-NEXT:    vpsubb %ymm3, %ymm2, %ymm2
@@ -534,10 +534,10 @@ define <64 x i8> @test_rem7_64i8(<64 x i
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32]
 ; AVX512BW-NEXT:    vpxorq %zmm3, %zmm2, %zmm2
-; AVX512BW-NEXT:    vpsubb %zmm3, %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsrlw $7, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm2, %zmm1
+; AVX512BW-NEXT:    vpsubb %zmm3, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsllw $3, %zmm1, %zmm2
 ; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512BW-NEXT:    vpsubb %zmm2, %zmm1, %zmm1




More information about the llvm-commits mailing list