[llvm] 7874db7 - [NFC][Codegen] Add `x u% C1 == C2` with C1 u> C2 non-tautological tests

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 10 08:46:20 PST 2019


Author: Roman Lebedev
Date: 2019-11-10T19:44:46+03:00
New Revision: 7874db75ef41f5c387f83e1d0bd16607721ddf89

URL: https://github.com/llvm/llvm-project/commit/7874db75ef41f5c387f83e1d0bd16607721ddf89
DIFF: https://github.com/llvm/llvm-project/commit/7874db75ef41f5c387f83e1d0bd16607721ddf89.diff

LOG: [NFC][Codegen] Add `x u% C1 == C2` with C1 u> C2 non-tautological tests

Added: 
    llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
    llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
    llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
    llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
new file mode 100644
index 000000000000..6fe10ce5d5b3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define i1 @t32_3_1(i32 %X) nounwind {
+; CHECK-LABEL: t32_3_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #33
+; CHECK-NEXT:    add w8, w8, w8, lsl #1
+; CHECK-NEXT:    sub w8, w0, w8
+; CHECK-NEXT:    cmp w8, #1 // =1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 3
+  %cmp = icmp eq i32 %urem, 1
+  ret i1 %cmp
+}
+
+define i1 @t32_3_2(i32 %X) nounwind {
+; CHECK-LABEL: t32_3_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #33
+; CHECK-NEXT:    add w8, w8, w8, lsl #1
+; CHECK-NEXT:    sub w8, w0, w8
+; CHECK-NEXT:    cmp w8, #2 // =2
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 3
+  %cmp = icmp eq i32 %urem, 2
+  ret i1 %cmp
+}
+
+
+define i1 @t32_5_1(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    movk w8, #52428, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    add w8, w8, w8, lsl #2
+; CHECK-NEXT:    sub w8, w0, w8
+; CHECK-NEXT:    cmp w8, #1 // =1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 1
+  ret i1 %cmp
+}
+
+define i1 @t32_5_2(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    movk w8, #52428, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    add w8, w8, w8, lsl #2
+; CHECK-NEXT:    sub w8, w0, w8
+; CHECK-NEXT:    cmp w8, #2 // =2
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t32_5_3(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    movk w8, #52428, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    add w8, w8, w8, lsl #2
+; CHECK-NEXT:    sub w8, w0, w8
+; CHECK-NEXT:    cmp w8, #3 // =3
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 3
+  ret i1 %cmp
+}
+
+define i1 @t32_5_4(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    movk w8, #52428, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    add w8, w8, w8, lsl #2
+; CHECK-NEXT:    sub w8, w0, w8
+; CHECK-NEXT:    cmp w8, #4 // =4
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 4
+  ret i1 %cmp
+}
+
+
+define i1 @t32_6_1(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    mov w9, #6
+; CHECK-NEXT:    msub w8, w8, w9, w0
+; CHECK-NEXT:    cmp w8, #1 // =1
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 1
+  ret i1 %cmp
+}
+
+define i1 @t32_6_2(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    mov w9, #6
+; CHECK-NEXT:    msub w8, w8, w9, w0
+; CHECK-NEXT:    cmp w8, #2 // =2
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t32_6_3(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    mov w9, #6
+; CHECK-NEXT:    msub w8, w8, w9, w0
+; CHECK-NEXT:    cmp w8, #3 // =3
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 3
+  ret i1 %cmp
+}
+
+define i1 @t32_6_4(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    mov w9, #6
+; CHECK-NEXT:    msub w8, w8, w9, w0
+; CHECK-NEXT:    cmp w8, #4 // =4
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 4
+  ret i1 %cmp
+}
+
+define i1 @t32_6_5(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    umull x8, w0, w8
+; CHECK-NEXT:    lsr x8, x8, #34
+; CHECK-NEXT:    mov w9, #6
+; CHECK-NEXT:    msub w8, w8, w9, w0
+; CHECK-NEXT:    cmp w8, #5 // =5
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 5
+  ret i1 %cmp
+}
+
+;-------------------------------------------------------------------------------
+; Other widths.
+
+define i1 @t16_3_2(i16 %X) nounwind {
+; CHECK-LABEL: t16_3_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w9, #43691
+; CHECK-NEXT:    and w8, w0, #0xffff
+; CHECK-NEXT:    movk w9, #43690, lsl #16
+; CHECK-NEXT:    umull x9, w8, w9
+; CHECK-NEXT:    lsr x9, x9, #33
+; CHECK-NEXT:    add w9, w9, w9, lsl #1
+; CHECK-NEXT:    sub w8, w8, w9
+; CHECK-NEXT:    cmp w8, #2 // =2
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i16 %X, 3
+  %cmp = icmp eq i16 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t8_3_2(i8 %X) nounwind {
+; CHECK-LABEL: t8_3_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w9, #43691
+; CHECK-NEXT:    and w8, w0, #0xff
+; CHECK-NEXT:    movk w9, #43690, lsl #16
+; CHECK-NEXT:    umull x9, w8, w9
+; CHECK-NEXT:    lsr x9, x9, #33
+; CHECK-NEXT:    add w9, w9, w9, lsl #1
+; CHECK-NEXT:    sub w8, w8, w9
+; CHECK-NEXT:    cmp w8, #2 // =2
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i8 %X, 3
+  %cmp = icmp eq i8 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t64_3_2(i64 %X) nounwind {
+; CHECK-LABEL: t64_3_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov x8, #-6148914691236517206
+; CHECK-NEXT:    movk x8, #43691
+; CHECK-NEXT:    umulh x8, x0, x8
+; CHECK-NEXT:    lsr x8, x8, #1
+; CHECK-NEXT:    add x8, x8, x8, lsl #1
+; CHECK-NEXT:    sub x8, x0, x8
+; CHECK-NEXT:    cmp x8, #2 // =2
+; CHECK-NEXT:    cset w0, eq
+; CHECK-NEXT:    ret
+  %urem = urem i64 %X, 3
+  %cmp = icmp eq i64 %urem, 2
+  ret i1 %cmp
+}

diff  --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
new file mode 100644
index 000000000000..f45b5598eae4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    adrp x9, .LCPI0_0
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI0_0]
+; CHECK-NEXT:    umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #1
+; CHECK-NEXT:    movi v3.4s, #3
+; CHECK-NEXT:    mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
+  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_5:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    movk w8, #52428, lsl #16
+; CHECK-NEXT:    adrp x9, .LCPI1_0
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI1_0]
+; CHECK-NEXT:    umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-NEXT:    movi v3.4s, #5
+; CHECK-NEXT:    mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
+  %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_6_part0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    adrp x9, .LCPI2_0
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI2_0]
+; CHECK-NEXT:    umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-NEXT:    movi v3.4s, #6
+; CHECK-NEXT:    mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_6_part1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    movk w8, #43690, lsl #16
+; CHECK-NEXT:    adrp x9, .LCPI3_0
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    ldr q2, [x9, :lo12:.LCPI3_0]
+; CHECK-NEXT:    umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:    ushr v1.4s, v1.4s, #2
+; CHECK-NEXT:    movi v3.4s, #6
+; CHECK-NEXT:    mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+  %cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_tautological:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, .LCPI4_0
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT:    adrp x8, .LCPI4_1
+; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_1]
+; CHECK-NEXT:    adrp x8, .LCPI4_2
+; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI4_2]
+; CHECK-NEXT:    adrp x8, .LCPI4_3
+; CHECK-NEXT:    ldr q4, [x8, :lo12:.LCPI4_3]
+; CHECK-NEXT:    adrp x8, .LCPI4_4
+; CHECK-NEXT:    umull2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT:    umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT:    neg v2.4s, v2.4s
+; CHECK-NEXT:    uzp2 v1.4s, v1.4s, v5.4s
+; CHECK-NEXT:    ldr q5, [x8, :lo12:.LCPI4_4]
+; CHECK-NEXT:    ushl v1.4s, v1.4s, v2.4s
+; CHECK-NEXT:    bsl v3.16b, v0.16b, v1.16b
+; CHECK-NEXT:    mls v0.4s, v3.4s, v4.4s
+; CHECK-NEXT:    cmeq v0.4s, v0.4s, v5.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3>
+  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x i1> %cmp
+}

diff  --git a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
new file mode 100644
index 000000000000..f8a7d7ba5194
--- /dev/null
+++ b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
+
+define i1 @t32_3_1(i32 %X) nounwind {
+; X86-LABEL: t32_3_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $1, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_3_1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $33, %rcx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $1, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 3
+  %cmp = icmp eq i32 %urem, 1
+  ret i1 %cmp
+}
+
+define i1 @t32_3_2(i32 %X) nounwind {
+; X86-LABEL: t32_3_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $2, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_3_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $33, %rcx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $2, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 3
+  %cmp = icmp eq i32 %urem, 2
+  ret i1 %cmp
+}
+
+
+define i1 @t32_5_1(i32 %X) nounwind {
+; X86-LABEL: t32_5_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl $2, %edx
+; X86-NEXT:    leal (%edx,%edx,4), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $1, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_5_1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    leal (%rcx,%rcx,4), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $1, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 1
+  ret i1 %cmp
+}
+
+define i1 @t32_5_2(i32 %X) nounwind {
+; X86-LABEL: t32_5_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl $2, %edx
+; X86-NEXT:    leal (%edx,%edx,4), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $2, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_5_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    leal (%rcx,%rcx,4), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $2, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t32_5_3(i32 %X) nounwind {
+; X86-LABEL: t32_5_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl $2, %edx
+; X86-NEXT:    leal (%edx,%edx,4), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $3, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_5_3:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    leal (%rcx,%rcx,4), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $3, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 3
+  ret i1 %cmp
+}
+
+define i1 @t32_5_4(i32 %X) nounwind {
+; X86-LABEL: t32_5_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl $2, %edx
+; X86-NEXT:    leal (%edx,%edx,4), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $4, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_5_4:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    leal (%rcx,%rcx,4), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $4, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 5
+  %cmp = icmp eq i32 %urem, 4
+  ret i1 %cmp
+}
+
+
+define i1 @t32_6_1(i32 %X) nounwind {
+; X86-LABEL: t32_6_1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    andl $-2, %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $1, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_6_1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $1, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 1
+  ret i1 %cmp
+}
+
+define i1 @t32_6_2(i32 %X) nounwind {
+; X86-LABEL: t32_6_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    andl $-2, %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $2, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_6_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $2, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t32_6_3(i32 %X) nounwind {
+; X86-LABEL: t32_6_3:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    andl $-2, %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $3, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_6_3:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $3, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 3
+  ret i1 %cmp
+}
+
+define i1 @t32_6_4(i32 %X) nounwind {
+; X86-LABEL: t32_6_4:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    andl $-2, %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $4, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_6_4:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $4, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 4
+  ret i1 %cmp
+}
+
+define i1 @t32_6_5(i32 %X) nounwind {
+; X86-LABEL: t32_6_5:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    mull %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    andl $-2, %edx
+; X86-NEXT:    leal (%edx,%edx,2), %eax
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    cmpl $5, %ecx
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t32_6_5:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT:    imulq %rax, %rcx
+; X64-NEXT:    shrq $34, %rcx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    leal (%rcx,%rcx,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpl $5, %edi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i32 %X, 6
+  %cmp = icmp eq i32 %urem, 5
+  ret i1 %cmp
+}
+
+;-------------------------------------------------------------------------------
+; Other widths.
+
+define i1 @t16_3_2(i16 %X) nounwind {
+; X86-LABEL: t16_3_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    imull $43691, %eax, %ecx # imm = 0xAAAB
+; X86-NEXT:    shrl $17, %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    cmpw $2, %ax
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t16_3_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    imull $43691, %eax, %eax # imm = 0xAAAB
+; X64-NEXT:    shrl $17, %eax
+; X64-NEXT:    leal (%rax,%rax,2), %eax
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    cmpw $2, %di
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i16 %X, 3
+  %cmp = icmp eq i16 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t8_3_2(i8 %X) nounwind {
+; X86-LABEL: t8_3_2:
+; X86:       # %bb.0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    imull $171, %eax, %ecx
+; X86-NEXT:    shrl $9, %ecx
+; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
+; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    cmpb $2, %al
+; X86-NEXT:    sete %al
+; X86-NEXT:    retl
+;
+; X64-LABEL: t8_3_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    imull $171, %eax, %ecx
+; X64-NEXT:    shrl $9, %ecx
+; X64-NEXT:    leal (%rcx,%rcx,2), %ecx
+; X64-NEXT:    subb %cl, %al
+; X64-NEXT:    cmpb $2, %al
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i8 %X, 3
+  %cmp = icmp eq i8 %urem, 2
+  ret i1 %cmp
+}
+
+define i1 @t64_3_2(i64 %X) nounwind {
+; X86-LABEL: t64_3_2:
+; X86:       # %bb.0:
+; X86-NEXT:    subl $12, %esp
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $3
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    calll __umoddi3
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    xorl $2, %eax
+; X86-NEXT:    orl %edx, %eax
+; X86-NEXT:    sete %al
+; X86-NEXT:    addl $12, %esp
+; X86-NEXT:    retl
+;
+; X64-LABEL: t64_3_2:
+; X64:       # %bb.0:
+; X64-NEXT:    movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    mulq %rcx
+; X64-NEXT:    shrq %rdx
+; X64-NEXT:    leaq (%rdx,%rdx,2), %rax
+; X64-NEXT:    subq %rax, %rdi
+; X64-NEXT:    cmpq $2, %rdi
+; X64-NEXT:    sete %al
+; X64-NEXT:    retq
+  %urem = urem i64 %X, 3
+  %cmp = icmp eq i64 %urem, 2
+  ret i1 %cmp
+}

diff  --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
new file mode 100644
index 000000000000..b580e39c7289
--- /dev/null
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE41
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL
+
+define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_3:
+; CHECK-SSE2:       # %bb.0:
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psrld $1, %xmm2
+; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT:    retq
+;
+; CHECK-SSE41-LABEL: t32_3:
+; CHECK-SSE41:       # %bb.0:
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT:    psrld $1, %xmm2
+; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: t32_3:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT:    vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: t32_3:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT:    vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
+; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512VL-LABEL: t32_3:
+; CHECK-AVX512VL:       # %bb.0:
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT:    vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    retq
+  %urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
+  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_5:
+; CHECK-SSE2:       # %bb.0:
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psrld $2, %xmm2
+; CHECK-SSE2-NEXT:    movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    pslld $2, %xmm1
+; CHECK-SSE2-NEXT:    paddd %xmm2, %xmm1
+; CHECK-SSE2-NEXT:    psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT:    retq
+;
+; CHECK-SSE41-LABEL: t32_5:
+; CHECK-SSE41:       # %bb.0:
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT:    psrld $2, %xmm2
+; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: t32_5:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: t32_5:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
+; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512VL-LABEL: t32_5:
+; CHECK-AVX512VL:       # %bb.0:
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    retq
+  %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
+  %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_6_part0:
+; CHECK-SSE2:       # %bb.0:
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psrld $2, %xmm2
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [6,6,6,6]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT:    retq
+;
+; CHECK-SSE41-LABEL: t32_6_part0:
+; CHECK-SSE41:       # %bb.0:
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT:    psrld $2, %xmm2
+; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: t32_6_part0:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: t32_6_part0:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
+; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512VL-LABEL: t32_6_part0:
+; CHECK-AVX512VL:       # %bb.0:
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    retq
+  %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_6_part1:
+; CHECK-SSE2:       # %bb.0:
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psrld $2, %xmm2
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [6,6,6,6]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT:    retq
+;
+; CHECK-SSE41-LABEL: t32_6_part1:
+; CHECK-SSE41:       # %bb.0:
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT:    psrld $2, %xmm2
+; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: t32_6_part1:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: t32_6_part1:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
+; CHECK-AVX2-NEXT:    vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512VL-LABEL: t32_6_part1:
+; CHECK-AVX512VL:       # %bb.0:
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT:    vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    retq
+  %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+  %cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_tautological:
+; CHECK-SSE2:       # %bb.0:
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,1,2,2]
+; CHECK-SSE2-NEXT:    psrld $1, %xmm2
+; CHECK-SSE2-NEXT:    punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; CHECK-SSE2-NEXT:    movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [1,1,2,3]
+; CHECK-SSE2-NEXT:    movapd %xmm1, %xmm3
+; CHECK-SSE2-NEXT:    pmuludq %xmm2, %xmm3
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT:    pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT:    punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; CHECK-SSE2-NEXT:    psubd %xmm3, %xmm0
+; CHECK-SSE2-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT:    retq
+;
+; CHECK-SSE41-LABEL: t32_tautological:
+; CHECK-SSE41:       # %bb.0:
+; CHECK-SSE41-NEXT:    movdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT:    pmuludq %xmm2, %xmm3
+; CHECK-SSE41-NEXT:    pmuludq %xmm0, %xmm1
+; CHECK-SSE41-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT:    movdqa %xmm1, %xmm2
+; CHECK-SSE41-NEXT:    psrld $1, %xmm2
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
+; CHECK-SSE41-NEXT:    pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; CHECK-SSE41-NEXT:    pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT:    psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT:    pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT:    retq
+;
+; CHECK-AVX1-LABEL: t32_tautological:
+; CHECK-AVX1:       # %bb.0:
+; CHECK-AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
+; CHECK-AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT:    vpsrld $1, %xmm1, %xmm2
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT:    vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-AVX1-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT:    retq
+;
+; CHECK-AVX2-LABEL: t32_tautological:
+; CHECK-AVX2:       # %bb.0:
+; CHECK-AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
+; CHECK-AVX2-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX2-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
+; CHECK-AVX2-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT:    retq
+;
+; CHECK-AVX512VL-LABEL: t32_tautological:
+; CHECK-AVX512VL:       # %bb.0:
+; CHECK-AVX512VL-NEXT:    vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm2, %xmm3, %xmm2
+; CHECK-AVX512VL-NEXT:    vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX512VL-NEXT:    vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
+; CHECK-AVX512VL-NEXT:    vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT:    retq
+  %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3>
+  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+  ret <4 x i1> %cmp
+}


        


More information about the llvm-commits mailing list