[llvm] 0c2d28a - [X86] Add tests for transform `(icmp eq/ne (and X, C0), (shift X, C1))`; NFC
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 17 23:17:07 PDT 2023
Author: Noah Goldstein
Date: 2023-10-18T01:16:55-05:00
New Revision: 0c2d28a448dee14f52c4600368da0345048588bb
URL: https://github.com/llvm/llvm-project/commit/0c2d28a448dee14f52c4600368da0345048588bb
DIFF: https://github.com/llvm/llvm-project/commit/0c2d28a448dee14f52c4600368da0345048588bb.diff
LOG: [X86] Add tests for transform `(icmp eq/ne (and X, C0), (shift X, C1))`; NFC
Differential Revision: https://reviews.llvm.org/D152115
Added:
llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
new file mode 100644
index 000000000000000..8ec142acb71d4ce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmp-shiftX-maskX.ll
@@ -0,0 +1,575 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,CHECK-NOBMI,CHECK-NOBMI-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-BMI2-SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx2 | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2,+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK,CHECK-BMI2,CHECK-AVX,CHECK-AVX512
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <16 x i16> @llvm.fshl.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
+declare <16 x i16> @llvm.fshr.v16i16(<16 x i16>, <16 x i16>, <16 x i16>)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+
+define i1 @shr_to_shl_eq_i8_s2(i8 %x) {
+; CHECK-LABEL: shr_to_shl_eq_i8_s2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andb $63, %al
+; CHECK-NEXT: shrb $2, %dil
+; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i8 %x, 63
+ %shr = lshr i8 %x, 2
+ %r = icmp eq i8 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_ne_i8_s7(i8 %x) {
+; CHECK-LABEL: shl_to_shr_ne_i8_s7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shlb $7, %al
+; CHECK-NEXT: andb $-128, %dil
+; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %shl = shl i8 %x, 7
+ %and = and i8 %x, 128
+ %r = icmp ne i8 %shl, %and
+ ret i1 %r
+}
+
+define i1 @rorl_to_srl_ne_i8_s5_fail(i8 %x) {
+; CHECK-LABEL: rorl_to_srl_ne_i8_s5_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: rolb $5, %al
+; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %ror = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 5)
+ %r = icmp ne i8 %ror, %x
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_eq_i8_s1(i8 %x) {
+; CHECK-LABEL: shr_to_shl_eq_i8_s1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andb $127, %al
+; CHECK-NEXT: shrb %dil
+; CHECK-NEXT: cmpb %dil, %al
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i8 %x, 127
+ %shr = lshr i8 %x, 1
+ %r = icmp eq i8 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_eq_i32_s3(i32 %x) {
+; CHECK-LABEL: shr_to_shl_eq_i32_s3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF
+; CHECK-NEXT: shrl $3, %edi
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i32 %x, 536870911
+ %shr = lshr i32 %x, 3
+ %r = icmp eq i32 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_eq_i32_s3_fail(i32 %x) {
+; CHECK-LABEL: shl_to_shr_eq_i32_s3_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $536870911, %eax # imm = 0x1FFFFFFF
+; CHECK-NEXT: shll $3, %edi
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i32 %x, 536870911
+ %shr = shl i32 %x, 3
+ %r = icmp eq i32 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_ne_i32_s16(i32 %x) {
+; CHECK-LABEL: shl_to_shr_ne_i32_s16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: andl $-65536, %edi # imm = 0xFFFF0000
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %shl = shl i32 %x, 16
+ %and = and i32 %x, 4294901760
+ %r = icmp ne i32 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_ne_i32_s16_fail(i32 %x) {
+; CHECK-LABEL: shl_to_shr_ne_i32_s16_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: andl $2147450880, %edi # imm = 0x7FFF8000
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %shl = shl i32 %x, 16
+ %and = and i32 %x, 2147450880
+ %r = icmp ne i32 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_eq_i16_s1(i16 %x) {
+; CHECK-LABEL: shr_to_shl_eq_i16_s1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: andl $32767, %edi # imm = 0x7FFF
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: cmpw %ax, %di
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i16 %x, 32767
+ %shr = lshr i16 %x, 1
+ %r = icmp eq i16 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_eq_i16_s1_fail(i16 %x) {
+; CHECK-LABEL: shr_to_shl_eq_i16_s1_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movzwl %di, %eax
+; CHECK-NEXT: andl $32766, %edi # imm = 0x7FFE
+; CHECK-NEXT: shrl %eax
+; CHECK-NEXT: cmpw %ax, %di
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %and = and i16 %x, 32766
+ %shr = lshr i16 %x, 1
+ %r = icmp eq i16 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_eq_i64_s44(i64 %x) {
+; CHECK-LABEL: shl_to_shr_eq_i64_s44:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $-17592186044416, %rax # imm = 0xFFFFF00000000000
+; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: shlq $44, %rdi
+; CHECK-NEXT: cmpq %rax, %rdi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %shl = shl i64 %x, 44
+ %and = and i64 %x, 18446726481523507200
+ %r = icmp eq i64 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_ne_i64_s32(i64 %x) {
+; CHECK-LABEL: shr_to_shl_ne_i64_s32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shrq $32, %rdi
+; CHECK-NEXT: cmpq %rdi, %rax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i64 %x, 4294967295
+ %shr = lshr i64 %x, 32
+ %r = icmp ne i64 %and, %shr
+ ret i1 %r
+}
+
+define i1 @rorl_to_shl_eq_i64_s16(i64 %x) {
+; CHECK-NOBMI-LABEL: rorl_to_shl_eq_i64_s16:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movq %rdi, %rax
+; CHECK-NOBMI-NEXT: rolq $16, %rax
+; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
+; CHECK-NOBMI-NEXT: sete %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: rorl_to_shl_eq_i64_s16:
+; CHECK-BMI2: # %bb.0:
+; CHECK-BMI2-NEXT: rorxq $48, %rdi, %rax
+; CHECK-BMI2-NEXT: cmpq %rdi, %rax
+; CHECK-BMI2-NEXT: sete %al
+; CHECK-BMI2-NEXT: retq
+ %ror = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 16)
+ %r = icmp eq i64 %ror, %x
+ ret i1 %r
+}
+
+define i1 @ashr_to_shl_ne_i64_s32_fail(i64 %x) {
+; CHECK-LABEL: ashr_to_shl_ne_i64_s32_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: sarq $32, %rdi
+; CHECK-NEXT: cmpq %rdi, %rax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i64 %x, 4294967295
+ %shr = ashr i64 %x, 32
+ %r = icmp ne i64 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_eq_i64_s63(i64 %x) {
+; CHECK-LABEL: shl_to_shr_eq_i64_s63:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: shlq $63, %rdi
+; CHECK-NEXT: cmpq %rax, %rdi
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %shl = shl i64 %x, 63
+ %and = and i64 %x, 9223372036854775808
+ %r = icmp eq i64 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_eq_i64_s63_fail(i64 %x) {
+; CHECK-LABEL: shl_to_shr_eq_i64_s63_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000
+; CHECK-NEXT: andq %rdi, %rax
+; CHECK-NEXT: shlq $63, %rdi
+; CHECK-NEXT: cmpq %rax, %rdi
+; CHECK-NEXT: seta %al
+; CHECK-NEXT: retq
+ %shl = shl i64 %x, 63
+ %and = and i64 %x, 9223372036854775808
+ %r = icmp ugt i64 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_eq_i64_s7(i64 %x) {
+; CHECK-NOBMI-LABEL: shr_to_shl_eq_i64_s7:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movabsq $144115188075855871, %rax # imm = 0x1FFFFFFFFFFFFFF
+; CHECK-NOBMI-NEXT: andq %rdi, %rax
+; CHECK-NOBMI-NEXT: shrq $7, %rdi
+; CHECK-NOBMI-NEXT: cmpq %rdi, %rax
+; CHECK-NOBMI-NEXT: sete %al
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-LABEL: shr_to_shl_eq_i64_s7:
+; CHECK-BMI2: # %bb.0:
+; CHECK-BMI2-NEXT: movb $57, %al
+; CHECK-BMI2-NEXT: bzhiq %rax, %rdi, %rax
+; CHECK-BMI2-NEXT: shrq $7, %rdi
+; CHECK-BMI2-NEXT: cmpq %rdi, %rax
+; CHECK-BMI2-NEXT: sete %al
+; CHECK-BMI2-NEXT: retq
+ %and = and i64 %x, 144115188075855871
+ %shr = lshr i64 %x, 7
+ %r = icmp eq i64 %and, %shr
+ ret i1 %r
+}
+
+define i1 @shl_to_shr_ne_i32_s24(i32 %x) {
+; CHECK-LABEL: shl_to_shr_ne_i32_s24:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shll $24, %eax
+; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %shl = shl i32 %x, 24
+ %and = and i32 %x, 4278190080
+ %r = icmp ne i32 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_ne_i32_s24_fail(i32 %x) {
+; CHECK-LABEL: shr_to_shl_ne_i32_s24_fail:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: shrl $24, %eax
+; CHECK-NEXT: andl $-16777216, %edi # imm = 0xFF000000
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %shl = lshr i32 %x, 24
+ %and = and i32 %x, 4278190080
+ %r = icmp ne i32 %shl, %and
+ ret i1 %r
+}
+
+define i1 @shr_to_shl_ne_i32_s8(i32 %x) {
+; CHECK-LABEL: shr_to_shl_ne_i32_s8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andl $16777215, %eax # imm = 0xFFFFFF
+; CHECK-NEXT: shrl $8, %edi
+; CHECK-NEXT: cmpl %edi, %eax
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %and = and i32 %x, 16777215
+ %shr = lshr i32 %x, 8
+ %r = icmp ne i32 %and, %shr
+ ret i1 %r
+}
+
+define <4 x i1> @shr_to_ror_eq_4xi32_s4(<4 x i32> %x) {
+; CHECK-NOBMI-LABEL: shr_to_ror_eq_4xi32_s4:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: psrld $4, %xmm1
+; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: shr_to_ror_eq_4xi32_s4:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: psrld $4, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX2-LABEL: shr_to_ror_eq_4xi32_s4:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsrld $4, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [268435455,268435455,268435455,268435455]
+; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpsrld $4, %xmm0, %xmm1
+; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %shr = lshr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 4>
+ %and = and <4 x i32> %x, <i32 268435455, i32 268435455, i32 268435455, i32 268435455>
+ %r = icmp ne <4 x i32> %shr, %and
+ ret <4 x i1> %r
+}
+
+define <4 x i1> @shl_to_ror_eq_4xi32_s8(<4 x i32> %x) {
+; CHECK-NOBMI-LABEL: shl_to_ror_eq_4xi32_s8:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: pslld $8, %xmm1
+; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: shl_to_ror_eq_4xi32_s8:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pslld $8, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX2-LABEL: shl_to_ror_eq_4xi32_s8:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpslld $8, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s8:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpslld $8, %xmm0, %xmm1
+; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %shr = shl <4 x i32> %x, <i32 8, i32 8, i32 8, i32 8>
+ %and = and <4 x i32> %x, <i32 4294967040, i32 4294967040, i32 4294967040, i32 4294967040>
+ %r = icmp ne <4 x i32> %shr, %and
+ ret <4 x i1> %r
+}
+
+define <4 x i1> @shl_to_ror_eq_4xi32_s7_fail_no_p2(<4 x i32> %x) {
+; CHECK-NOBMI-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: pslld $7, %xmm1
+; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pslld $7, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX2-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpslld $7, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967168,4294967168,4294967168,4294967168]
+; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: shl_to_ror_eq_4xi32_s7_fail_no_p2:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpslld $7, %xmm0, %xmm1
+; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %shr = shl <4 x i32> %x, <i32 7, i32 7, i32 7, i32 7>
+ %and = and <4 x i32> %x, <i32 4294967168, i32 4294967168, i32 4294967168, i32 4294967168>
+ %r = icmp ne <4 x i32> %shr, %and
+ ret <4 x i1> %r
+}
+
+define <4 x i1> @shr_to_ror_eq_4xi32_s4_fail_no_splat(<4 x i32> %x) {
+; CHECK-NOBMI-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm1
+; CHECK-NOBMI-NEXT: psrld $4, %xmm1
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOBMI-NEXT: psrld $8, %xmm2
+; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[2,0]
+; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,0]
+; CHECK-NOBMI-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-BMI2-SSE2-NEXT: psrld $4, %xmm1
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-BMI2-SSE2-NEXT: psrld $8, %xmm2
+; CHECK-BMI2-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[2,0]
+; CHECK-BMI2-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,0]
+; CHECK-BMI2-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX2-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [268435455,268435455,268435455,268435455]
+; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: shr_to_ror_eq_4xi32_s4_fail_no_splat:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: retq
+ %shr = lshr <4 x i32> %x, <i32 4, i32 4, i32 4, i32 8>
+ %and = and <4 x i32> %x, <i32 268435455, i32 268435455, i32 268435455, i32 268435455>
+ %r = icmp ne <4 x i32> %shr, %and
+ ret <4 x i1> %r
+}
+
+define <16 x i1> @shl_to_ror_eq_16xi16_s8_fail_preserve_i16(<16 x i16> %x) {
+; CHECK-NOBMI-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16:
+; CHECK-NOBMI: # %bb.0:
+; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm2
+; CHECK-NOBMI-NEXT: psllw $8, %xmm2
+; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm3
+; CHECK-NOBMI-NEXT: psllw $8, %xmm3
+; CHECK-NOBMI-NEXT: movdqa {{.*#+}} xmm4 = [0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255]
+; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqw %xmm2, %xmm0
+; CHECK-NOBMI-NEXT: pand %xmm4, %xmm1
+; CHECK-NOBMI-NEXT: pcmpeqw %xmm3, %xmm1
+; CHECK-NOBMI-NEXT: packsswb %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-NOBMI-NEXT: pxor %xmm1, %xmm0
+; CHECK-NOBMI-NEXT: retq
+;
+; CHECK-BMI2-SSE2-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16:
+; CHECK-BMI2-SSE2: # %bb.0:
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-BMI2-SSE2-NEXT: psllw $8, %xmm2
+; CHECK-BMI2-SSE2-NEXT: movdqa %xmm1, %xmm3
+; CHECK-BMI2-SSE2-NEXT: psllw $8, %xmm3
+; CHECK-BMI2-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255]
+; CHECK-BMI2-SSE2-NEXT: pand %xmm4, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqw %xmm2, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pand %xmm4, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pcmpeqw %xmm3, %xmm1
+; CHECK-BMI2-SSE2-NEXT: packsswb %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; CHECK-BMI2-SSE2-NEXT: pxor %xmm1, %xmm0
+; CHECK-BMI2-SSE2-NEXT: retq
+;
+; CHECK-AVX2-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsllw $8, %ymm0, %ymm1
+; CHECK-AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; CHECK-AVX2-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; CHECK-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; CHECK-AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vzeroupper
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512-LABEL: shl_to_ror_eq_16xi16_s8_fail_preserve_i16:
+; CHECK-AVX512: # %bb.0:
+; CHECK-AVX512-NEXT: vpsllw $8, %ymm0, %ymm1
+; CHECK-AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; CHECK-AVX512-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; CHECK-AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
+; CHECK-AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; CHECK-AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0
+; CHECK-AVX512-NEXT: vzeroupper
+; CHECK-AVX512-NEXT: retq
+ %shr = shl <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ %and = and <16 x i16> %x, <i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040, i16 4294967040>
+ %r = icmp ne <16 x i16> %shr, %and
+ ret <16 x i1> %r
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK-AVX: {{.*}}
+; CHECK-NOBMI-SSE2: {{.*}}
More information about the llvm-commits
mailing list