[llvm] 7874db7 - [NFC][Codegen] Add `x u% C1 == C2` with C1 u> C2 non-tautological tests
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 10 08:46:20 PST 2019
Author: Roman Lebedev
Date: 2019-11-10T19:44:46+03:00
New Revision: 7874db75ef41f5c387f83e1d0bd16607721ddf89
URL: https://github.com/llvm/llvm-project/commit/7874db75ef41f5c387f83e1d0bd16607721ddf89
DIFF: https://github.com/llvm/llvm-project/commit/7874db75ef41f5c387f83e1d0bd16607721ddf89.diff
LOG: [NFC][Codegen] Add `x u% C1 == C2` with C1 u> C2 non-tautological tests
Added:
llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
new file mode 100644
index 000000000000..6fe10ce5d5b3
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-nonzero.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define i1 @t32_3_1(i32 %X) nounwind {
+; CHECK-LABEL: t32_3_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #33
+; CHECK-NEXT: add w8, w8, w8, lsl #1
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: cmp w8, #1 // =1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 3
+ %cmp = icmp eq i32 %urem, 1
+ ret i1 %cmp
+}
+
+define i1 @t32_3_2(i32 %X) nounwind {
+; CHECK-LABEL: t32_3_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #33
+; CHECK-NEXT: add w8, w8, w8, lsl #1
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: cmp w8, #2 // =2
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 3
+ %cmp = icmp eq i32 %urem, 2
+ ret i1 %cmp
+}
+
+
+define i1 @t32_5_1(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #52429
+; CHECK-NEXT: movk w8, #52428, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: add w8, w8, w8, lsl #2
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: cmp w8, #1 // =1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 1
+ ret i1 %cmp
+}
+
+define i1 @t32_5_2(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #52429
+; CHECK-NEXT: movk w8, #52428, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: add w8, w8, w8, lsl #2
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: cmp w8, #2 // =2
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t32_5_3(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #52429
+; CHECK-NEXT: movk w8, #52428, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: add w8, w8, w8, lsl #2
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: cmp w8, #3 // =3
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 3
+ ret i1 %cmp
+}
+
+define i1 @t32_5_4(i32 %X) nounwind {
+; CHECK-LABEL: t32_5_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #52429
+; CHECK-NEXT: movk w8, #52428, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: add w8, w8, w8, lsl #2
+; CHECK-NEXT: sub w8, w0, w8
+; CHECK-NEXT: cmp w8, #4 // =4
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 4
+ ret i1 %cmp
+}
+
+
+define i1 @t32_6_1(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: mov w9, #6
+; CHECK-NEXT: msub w8, w8, w9, w0
+; CHECK-NEXT: cmp w8, #1 // =1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 1
+ ret i1 %cmp
+}
+
+define i1 @t32_6_2(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: mov w9, #6
+; CHECK-NEXT: msub w8, w8, w9, w0
+; CHECK-NEXT: cmp w8, #2 // =2
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t32_6_3(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: mov w9, #6
+; CHECK-NEXT: msub w8, w8, w9, w0
+; CHECK-NEXT: cmp w8, #3 // =3
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 3
+ ret i1 %cmp
+}
+
+define i1 @t32_6_4(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: mov w9, #6
+; CHECK-NEXT: msub w8, w8, w9, w0
+; CHECK-NEXT: cmp w8, #4 // =4
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 4
+ ret i1 %cmp
+}
+
+define i1 @t32_6_5(i32 %X) nounwind {
+; CHECK-LABEL: t32_6_5:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: umull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #34
+; CHECK-NEXT: mov w9, #6
+; CHECK-NEXT: msub w8, w8, w9, w0
+; CHECK-NEXT: cmp w8, #5 // =5
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 5
+ ret i1 %cmp
+}
+
+;-------------------------------------------------------------------------------
+; Other widths.
+
+define i1 @t16_3_2(i16 %X) nounwind {
+; CHECK-LABEL: t16_3_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w9, #43691
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: movk w9, #43690, lsl #16
+; CHECK-NEXT: umull x9, w8, w9
+; CHECK-NEXT: lsr x9, x9, #33
+; CHECK-NEXT: add w9, w9, w9, lsl #1
+; CHECK-NEXT: sub w8, w8, w9
+; CHECK-NEXT: cmp w8, #2 // =2
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i16 %X, 3
+ %cmp = icmp eq i16 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t8_3_2(i8 %X) nounwind {
+; CHECK-LABEL: t8_3_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w9, #43691
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: movk w9, #43690, lsl #16
+; CHECK-NEXT: umull x9, w8, w9
+; CHECK-NEXT: lsr x9, x9, #33
+; CHECK-NEXT: add w9, w9, w9, lsl #1
+; CHECK-NEXT: sub w8, w8, w9
+; CHECK-NEXT: cmp w8, #2 // =2
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i8 %X, 3
+ %cmp = icmp eq i8 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t64_3_2(i64 %X) nounwind {
+; CHECK-LABEL: t64_3_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-6148914691236517206
+; CHECK-NEXT: movk x8, #43691
+; CHECK-NEXT: umulh x8, x0, x8
+; CHECK-NEXT: lsr x8, x8, #1
+; CHECK-NEXT: add x8, x8, x8, lsl #1
+; CHECK-NEXT: sub x8, x0, x8
+; CHECK-NEXT: cmp x8, #2 // =2
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %urem = urem i64 %X, 3
+ %cmp = icmp eq i64 %urem, 2
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
new file mode 100644
index 000000000000..f45b5598eae4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: adrp x9, .LCPI0_0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_0]
+; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #1
+; CHECK-NEXT: movi v3.4s, #3
+; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_5:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #52429
+; CHECK-NEXT: movk w8, #52428, lsl #16
+; CHECK-NEXT: adrp x9, .LCPI1_0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_0]
+; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #2
+; CHECK-NEXT: movi v3.4s, #5
+; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
+ %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_6_part0:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: adrp x9, .LCPI2_0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_0]
+; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #2
+; CHECK-NEXT: movi v3.4s, #6
+; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_6_part1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #43691
+; CHECK-NEXT: movk w8, #43690, lsl #16
+; CHECK-NEXT: adrp x9, .LCPI3_0
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_0]
+; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #2
+; CHECK-NEXT: movi v3.4s, #6
+; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+ %cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
+; CHECK-LABEL: t32_tautological:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI4_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
+; CHECK-NEXT: adrp x8, .LCPI4_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_1]
+; CHECK-NEXT: adrp x8, .LCPI4_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI4_2]
+; CHECK-NEXT: adrp x8, .LCPI4_3
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI4_3]
+; CHECK-NEXT: adrp x8, .LCPI4_4
+; CHECK-NEXT: umull2 v5.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: neg v2.4s, v2.4s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v5.4s
+; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI4_4]
+; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
+; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, v5.4s
+; CHECK-NEXT: xtn v0.4h, v0.4s
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+ ret <4 x i1> %cmp
+}
diff --git a/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
new file mode 100644
index 000000000000..f8a7d7ba5194
--- /dev/null
+++ b/llvm/test/CodeGen/X86/urem-seteq-nonzero.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
+
+define i1 @t32_3_1(i32 %X) nounwind {
+; X86-LABEL: t32_3_1:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_3_1:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $33, %rcx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $1, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 3
+ %cmp = icmp eq i32 %urem, 1
+ ret i1 %cmp
+}
+
+define i1 @t32_3_2(i32 %X) nounwind {
+; X86-LABEL: t32_3_2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $2, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_3_2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $33, %rcx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $2, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 3
+ %cmp = icmp eq i32 %urem, 2
+ ret i1 %cmp
+}
+
+
+define i1 @t32_5_1(i32 %X) nounwind {
+; X86-LABEL: t32_5_1:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl $2, %edx
+; X86-NEXT: leal (%edx,%edx,4), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_5_1:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: leal (%rcx,%rcx,4), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $1, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 1
+ ret i1 %cmp
+}
+
+define i1 @t32_5_2(i32 %X) nounwind {
+; X86-LABEL: t32_5_2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl $2, %edx
+; X86-NEXT: leal (%edx,%edx,4), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $2, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_5_2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: leal (%rcx,%rcx,4), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $2, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t32_5_3(i32 %X) nounwind {
+; X86-LABEL: t32_5_3:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl $2, %edx
+; X86-NEXT: leal (%edx,%edx,4), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $3, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_5_3:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: leal (%rcx,%rcx,4), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 3
+ ret i1 %cmp
+}
+
+define i1 @t32_5_4(i32 %X) nounwind {
+; X86-LABEL: t32_5_4:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-858993459, %edx # imm = 0xCCCCCCCD
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl $2, %edx
+; X86-NEXT: leal (%edx,%edx,4), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $4, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_5_4:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $3435973837, %ecx # imm = 0xCCCCCCCD
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: leal (%rcx,%rcx,4), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $4, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 5
+ %cmp = icmp eq i32 %urem, 4
+ ret i1 %cmp
+}
+
+
+define i1 @t32_6_1(i32 %X) nounwind {
+; X86-LABEL: t32_6_1:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $-2, %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_6_1:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: addl %ecx, %ecx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $1, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 1
+ ret i1 %cmp
+}
+
+define i1 @t32_6_2(i32 %X) nounwind {
+; X86-LABEL: t32_6_2:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $-2, %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $2, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_6_2:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: addl %ecx, %ecx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $2, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t32_6_3(i32 %X) nounwind {
+; X86-LABEL: t32_6_3:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $-2, %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $3, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_6_3:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: addl %ecx, %ecx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $3, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 3
+ ret i1 %cmp
+}
+
+define i1 @t32_6_4(i32 %X) nounwind {
+; X86-LABEL: t32_6_4:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $-2, %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $4, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_6_4:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: addl %ecx, %ecx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $4, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 4
+ ret i1 %cmp
+}
+
+define i1 @t32_6_5(i32 %X) nounwind {
+; X86-LABEL: t32_6_5:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1431655765, %edx # imm = 0xAAAAAAAB
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: andl $-2, %edx
+; X86-NEXT: leal (%edx,%edx,2), %eax
+; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: cmpl $5, %ecx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t32_6_5:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl $2863311531, %ecx # imm = 0xAAAAAAAB
+; X64-NEXT: imulq %rax, %rcx
+; X64-NEXT: shrq $34, %rcx
+; X64-NEXT: addl %ecx, %ecx
+; X64-NEXT: leal (%rcx,%rcx,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpl $5, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 6
+ %cmp = icmp eq i32 %urem, 5
+ ret i1 %cmp
+}
+
+;-------------------------------------------------------------------------------
+; Other widths.
+
+define i1 @t16_3_2(i16 %X) nounwind {
+; X86-LABEL: t16_3_2:
+; X86: # %bb.0:
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull $43691, %eax, %ecx # imm = 0xAAAB
+; X86-NEXT: shrl $17, %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: cmpw $2, %ax
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t16_3_2:
+; X64: # %bb.0:
+; X64-NEXT: movzwl %di, %eax
+; X64-NEXT: imull $43691, %eax, %eax # imm = 0xAAAB
+; X64-NEXT: shrl $17, %eax
+; X64-NEXT: leal (%rax,%rax,2), %eax
+; X64-NEXT: subl %eax, %edi
+; X64-NEXT: cmpw $2, %di
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i16 %X, 3
+ %cmp = icmp eq i16 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t8_3_2(i8 %X) nounwind {
+; X86-LABEL: t8_3_2:
+; X86: # %bb.0:
+; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: imull $171, %eax, %ecx
+; X86-NEXT: shrl $9, %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %ecx
+; X86-NEXT: subb %cl, %al
+; X86-NEXT: cmpb $2, %al
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: t8_3_2:
+; X64: # %bb.0:
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: imull $171, %eax, %ecx
+; X64-NEXT: shrl $9, %ecx
+; X64-NEXT: leal (%rcx,%rcx,2), %ecx
+; X64-NEXT: subb %cl, %al
+; X64-NEXT: cmpb $2, %al
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i8 %X, 3
+ %cmp = icmp eq i8 %urem, 2
+ ret i1 %cmp
+}
+
+define i1 @t64_3_2(i64 %X) nounwind {
+; X86-LABEL: t64_3_2:
+; X86: # %bb.0:
+; X86-NEXT: subl $12, %esp
+; X86-NEXT: pushl $0
+; X86-NEXT: pushl $3
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: pushl {{[0-9]+}}(%esp)
+; X86-NEXT: calll __umoddi3
+; X86-NEXT: addl $16, %esp
+; X86-NEXT: xorl $2, %eax
+; X86-NEXT: orl %edx, %eax
+; X86-NEXT: sete %al
+; X86-NEXT: addl $12, %esp
+; X86-NEXT: retl
+;
+; X64-LABEL: t64_3_2:
+; X64: # %bb.0:
+; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
+; X64-NEXT: movq %rdi, %rax
+; X64-NEXT: mulq %rcx
+; X64-NEXT: shrq %rdx
+; X64-NEXT: leaq (%rdx,%rdx,2), %rax
+; X64-NEXT: subq %rax, %rdi
+; X64-NEXT: cmpq $2, %rdi
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %urem = urem i64 %X, 3
+ %cmp = icmp eq i64 %urem, 2
+ ret i1 %cmp
+}
diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
new file mode 100644
index 000000000000..b580e39c7289
--- /dev/null
+++ b/llvm/test/CodeGen/X86/urem-seteq-vec-nonzero.ll
@@ -0,0 +1,434 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE41
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL
+
+define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_3:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psrld $1, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: t32_3:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: psrld $1, %xmm2
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: t32_3:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: t32_3:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3,3,3,3]
+; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: t32_3:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 3, i32 3, i32 3, i32 3>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_5(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_5:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: pslld $2, %xmm1
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: t32_5:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: psrld $2, %xmm2
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: t32_5:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: t32_5:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
+; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: t32_5:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
+ %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 2, i32 3, i32 4>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_6_part0:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: t32_6_part0:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: psrld $2, %xmm2
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: t32_6_part0:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: t32_6_part0:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
+; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: t32_6_part0:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_6_part1:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,6,6,6]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: t32_6_part1:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: psrld $2, %xmm2
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: t32_6_part1:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: t32_6_part1:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [6,6,6,6]
+; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: t32_6_part1:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2863311531,2863311531,2863311531,2863311531]
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
+ %cmp = icmp eq <4 x i32> %urem, <i32 4, i32 5, i32 0, i32 0>
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @t32_tautological(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: t32_tautological:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,1,2,2]
+; CHECK-SSE2-NEXT: psrld $1, %xmm2
+; CHECK-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,2,3]
+; CHECK-SSE2-NEXT: movapd %xmm1, %xmm3
+; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: t32_tautological:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE41-NEXT: psrld $1, %xmm2
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1,2,3,4,5],xmm2[6,7]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0,1,2,3],xmm2[4,5,6,7]
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: t32_tautological:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
+; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: t32_tautological:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
+; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: t32_tautological:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,2147483648,2863311531]
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
+; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3]
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 2, i32 3>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 1, i32 2, i32 2>
+ ret <4 x i1> %cmp
+}
More information about the llvm-commits
mailing list