[llvm] r343392 - [DAGCombiner][NFC] Tests for X div/rem Y single bit fold
David Bolvansky via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 29 14:00:37 PDT 2018
Author: xbolva00
Date: Sat Sep 29 14:00:37 2018
New Revision: 343392
URL: http://llvm.org/viewvc/llvm-project?rev=343392&view=rev
Log:
[DAGCombiner][NFC] Tests for X div/rem Y single bit fold
Modified:
llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
llvm/trunk/test/CodeGen/X86/combine-srem.ll
llvm/trunk/test/CodeGen/X86/combine-udiv.ll
llvm/trunk/test/CodeGen/X86/combine-urem.ll
Modified: llvm/trunk/test/CodeGen/X86/combine-sdiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-sdiv.ll?rev=343392&r1=343391&r2=343392&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-sdiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-sdiv.ll Sat Sep 29 14:00:37 2018
@@ -3284,3 +3284,327 @@ define <16 x i8> @pr38658(<16 x i8> %x)
%1 = sdiv <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 7>
ret <16 x i8> %1
}
+
+define i1 @bool_sdiv(i1 %x, i1 %y) {
+; CHECK-LABEL: bool_sdiv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: negb %al
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: cbtw
+; CHECK-NEXT: andb $1, %sil
+; CHECK-NEXT: negb %sil
+; CHECK-NEXT: idivb %sil
+; CHECK-NEXT: retq
+ %r = sdiv i1 %x, %y
+ ret i1 %r
+}
+
+define <4 x i1> @boolvec_sdiv(<4 x i1> %x, <4 x i1> %y) {
+; SSE2-LABEL: boolvec_sdiv:
+; SSE2: # %bb.0:
+; SSE2-NEXT: pslld $31, %xmm1
+; SSE2-NEXT: psrad $31, %xmm1
+; SSE2-NEXT: pslld $31, %xmm0
+; SSE2-NEXT: psrad $31, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT: movd %xmm2, %ecx
+; SSE2-NEXT: cltd
+; SSE2-NEXT: idivl %ecx
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm3, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm3, %ecx
+; SSE2-NEXT: cltd
+; SSE2-NEXT: idivl %ecx
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movd %xmm1, %ecx
+; SSE2-NEXT: cltd
+; SSE2-NEXT: idivl %ecx
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: cltd
+; SSE2-NEXT: idivl %ecx
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: boolvec_sdiv:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pslld $31, %xmm1
+; SSE41-NEXT: psrad $31, %xmm1
+; SSE41-NEXT: pslld $31, %xmm0
+; SSE41-NEXT: psrad $31, %xmm0
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %ecx
+; SSE41-NEXT: cltd
+; SSE41-NEXT: idivl %ecx
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: movd %xmm1, %esi
+; SSE41-NEXT: cltd
+; SSE41-NEXT: idivl %esi
+; SSE41-NEXT: movd %eax, %xmm2
+; SSE41-NEXT: pinsrd $1, %ecx, %xmm2
+; SSE41-NEXT: pextrd $2, %xmm0, %eax
+; SSE41-NEXT: pextrd $2, %xmm1, %ecx
+; SSE41-NEXT: cltd
+; SSE41-NEXT: idivl %ecx
+; SSE41-NEXT: pinsrd $2, %eax, %xmm2
+; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: pextrd $3, %xmm1, %ecx
+; SSE41-NEXT: cltd
+; SSE41-NEXT: idivl %ecx
+; SSE41-NEXT: pinsrd $3, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: boolvec_sdiv:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX1-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
+; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT: cltd
+; AVX1-NEXT: idivl %ecx
+; AVX1-NEXT: movl %eax, %ecx
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm1, %esi
+; AVX1-NEXT: cltd
+; AVX1-NEXT: idivl %esi
+; AVX1-NEXT: vmovd %eax, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT: cltd
+; AVX1-NEXT: idivl %ecx
+; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %eax
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: cltd
+; AVX1-NEXT: idivl %ecx
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: boolvec_sdiv:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX2-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX2-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX2-NEXT: cltd
+; AVX2-NEXT: idivl %ecx
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm1, %esi
+; AVX2-NEXT: cltd
+; AVX2-NEXT: idivl %esi
+; AVX2-NEXT: vmovd %eax, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX2-NEXT: cltd
+; AVX2-NEXT: idivl %ecx
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX2-NEXT: cltd
+; AVX2-NEXT: idivl %ecx
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: boolvec_sdiv:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3
+; AVX512F-NEXT: kshiftrw $3, %k3, %k0
+; AVX512F-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4
+; AVX512F-NEXT: kshiftrw $3, %k4, %k1
+; AVX512F-NEXT: kshiftrw $2, %k3, %k2
+; AVX512F-NEXT: kshiftrw $2, %k4, %k5
+; AVX512F-NEXT: kmovw %k5, %ecx
+; AVX512F-NEXT: kshiftrw $1, %k3, %k5
+; AVX512F-NEXT: kmovw %k3, %edi
+; AVX512F-NEXT: kshiftrw $1, %k4, %k3
+; AVX512F-NEXT: kmovw %k4, %esi
+; AVX512F-NEXT: kmovw %k5, %edx
+; AVX512F-NEXT: kmovw %k3, %eax
+; AVX512F-NEXT: andb $1, %al
+; AVX512F-NEXT: negb %al
+; AVX512F-NEXT: # kill: def $al killed $al killed $eax
+; AVX512F-NEXT: cbtw
+; AVX512F-NEXT: andb $1, %dl
+; AVX512F-NEXT: negb %dl
+; AVX512F-NEXT: idivb %dl
+; AVX512F-NEXT: movl %eax, %edx
+; AVX512F-NEXT: andb $1, %sil
+; AVX512F-NEXT: negb %sil
+; AVX512F-NEXT: movl %esi, %eax
+; AVX512F-NEXT: cbtw
+; AVX512F-NEXT: andb $1, %dil
+; AVX512F-NEXT: negb %dil
+; AVX512F-NEXT: idivb %dil
+; AVX512F-NEXT: movl %eax, %esi
+; AVX512F-NEXT: andb $1, %cl
+; AVX512F-NEXT: negb %cl
+; AVX512F-NEXT: movl %ecx, %eax
+; AVX512F-NEXT: cbtw
+; AVX512F-NEXT: kmovw %k2, %ecx
+; AVX512F-NEXT: andb $1, %cl
+; AVX512F-NEXT: negb %cl
+; AVX512F-NEXT: idivb %cl
+; AVX512F-NEXT: movl %eax, %ecx
+; AVX512F-NEXT: kmovw %k1, %eax
+; AVX512F-NEXT: andb $1, %al
+; AVX512F-NEXT: negb %al
+; AVX512F-NEXT: # kill: def $al killed $al killed $eax
+; AVX512F-NEXT: cbtw
+; AVX512F-NEXT: kmovw %k0, %edi
+; AVX512F-NEXT: andb $1, %dil
+; AVX512F-NEXT: negb %dil
+; AVX512F-NEXT: idivb %dil
+; AVX512F-NEXT: # kill: def $al killed $al def $eax
+; AVX512F-NEXT: kmovw %edx, %k0
+; AVX512F-NEXT: kmovw %esi, %k1
+; AVX512F-NEXT: kshiftrw $1, %k1, %k2
+; AVX512F-NEXT: kxorw %k0, %k2, %k0
+; AVX512F-NEXT: kshiftlw $15, %k0, %k0
+; AVX512F-NEXT: kshiftrw $14, %k0, %k0
+; AVX512F-NEXT: kxorw %k0, %k1, %k0
+; AVX512F-NEXT: kshiftrw $2, %k0, %k1
+; AVX512F-NEXT: kmovw %ecx, %k2
+; AVX512F-NEXT: kxorw %k2, %k1, %k1
+; AVX512F-NEXT: kshiftlw $15, %k1, %k1
+; AVX512F-NEXT: kshiftrw $13, %k1, %k1
+; AVX512F-NEXT: kxorw %k1, %k0, %k0
+; AVX512F-NEXT: kshiftlw $13, %k0, %k0
+; AVX512F-NEXT: kshiftrw $13, %k0, %k0
+; AVX512F-NEXT: kmovw %eax, %k1
+; AVX512F-NEXT: kshiftlw $3, %k1, %k1
+; AVX512F-NEXT: korw %k1, %k0, %k1
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
+; AVX512F-NEXT: vzeroupper
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: boolvec_sdiv:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX512BW-NEXT: vptestmd %xmm1, %xmm1, %k3
+; AVX512BW-NEXT: kshiftrw $3, %k3, %k0
+; AVX512BW-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX512BW-NEXT: vptestmd %xmm0, %xmm0, %k4
+; AVX512BW-NEXT: kshiftrw $3, %k4, %k1
+; AVX512BW-NEXT: kshiftrw $2, %k3, %k2
+; AVX512BW-NEXT: kshiftrw $2, %k4, %k5
+; AVX512BW-NEXT: kmovd %k5, %ecx
+; AVX512BW-NEXT: kshiftrw $1, %k3, %k5
+; AVX512BW-NEXT: kmovd %k3, %edi
+; AVX512BW-NEXT: kshiftrw $1, %k4, %k3
+; AVX512BW-NEXT: kmovd %k4, %esi
+; AVX512BW-NEXT: kmovd %k5, %edx
+; AVX512BW-NEXT: kmovd %k3, %eax
+; AVX512BW-NEXT: andb $1, %al
+; AVX512BW-NEXT: negb %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: cbtw
+; AVX512BW-NEXT: andb $1, %dl
+; AVX512BW-NEXT: negb %dl
+; AVX512BW-NEXT: idivb %dl
+; AVX512BW-NEXT: movl %eax, %edx
+; AVX512BW-NEXT: andb $1, %sil
+; AVX512BW-NEXT: negb %sil
+; AVX512BW-NEXT: movl %esi, %eax
+; AVX512BW-NEXT: cbtw
+; AVX512BW-NEXT: andb $1, %dil
+; AVX512BW-NEXT: negb %dil
+; AVX512BW-NEXT: idivb %dil
+; AVX512BW-NEXT: movl %eax, %esi
+; AVX512BW-NEXT: andb $1, %cl
+; AVX512BW-NEXT: negb %cl
+; AVX512BW-NEXT: movl %ecx, %eax
+; AVX512BW-NEXT: cbtw
+; AVX512BW-NEXT: kmovd %k2, %ecx
+; AVX512BW-NEXT: andb $1, %cl
+; AVX512BW-NEXT: negb %cl
+; AVX512BW-NEXT: idivb %cl
+; AVX512BW-NEXT: movl %eax, %ecx
+; AVX512BW-NEXT: kmovd %k1, %eax
+; AVX512BW-NEXT: andb $1, %al
+; AVX512BW-NEXT: negb %al
+; AVX512BW-NEXT: # kill: def $al killed $al killed $eax
+; AVX512BW-NEXT: cbtw
+; AVX512BW-NEXT: kmovd %k0, %edi
+; AVX512BW-NEXT: andb $1, %dil
+; AVX512BW-NEXT: negb %dil
+; AVX512BW-NEXT: idivb %dil
+; AVX512BW-NEXT: # kill: def $al killed $al def $eax
+; AVX512BW-NEXT: kmovd %edx, %k0
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: kshiftrw $1, %k1, %k2
+; AVX512BW-NEXT: kxorw %k0, %k2, %k0
+; AVX512BW-NEXT: kshiftlw $15, %k0, %k0
+; AVX512BW-NEXT: kshiftrw $14, %k0, %k0
+; AVX512BW-NEXT: kxorw %k0, %k1, %k0
+; AVX512BW-NEXT: kshiftrw $2, %k0, %k1
+; AVX512BW-NEXT: kmovd %ecx, %k2
+; AVX512BW-NEXT: kxorw %k2, %k1, %k1
+; AVX512BW-NEXT: kshiftlw $15, %k1, %k1
+; AVX512BW-NEXT: kshiftrw $13, %k1, %k1
+; AVX512BW-NEXT: kxorw %k1, %k0, %k0
+; AVX512BW-NEXT: kshiftlw $13, %k0, %k0
+; AVX512BW-NEXT: kshiftrw $13, %k0, %k0
+; AVX512BW-NEXT: kmovd %eax, %k1
+; AVX512BW-NEXT: kshiftlw $3, %k1, %k1
+; AVX512BW-NEXT: korw %k1, %k0, %k1
+; AVX512BW-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
+; AVX512BW-NEXT: retq
+;
+; XOP-LABEL: boolvec_sdiv:
+; XOP: # %bb.0:
+; XOP-NEXT: vpslld $31, %xmm1, %xmm1
+; XOP-NEXT: vpsrad $31, %xmm1, %xmm1
+; XOP-NEXT: vpslld $31, %xmm0, %xmm0
+; XOP-NEXT: vpsrad $31, %xmm0, %xmm0
+; XOP-NEXT: vpextrd $1, %xmm0, %eax
+; XOP-NEXT: vpextrd $1, %xmm1, %ecx
+; XOP-NEXT: cltd
+; XOP-NEXT: idivl %ecx
+; XOP-NEXT: movl %eax, %ecx
+; XOP-NEXT: vmovd %xmm0, %eax
+; XOP-NEXT: vmovd %xmm1, %esi
+; XOP-NEXT: cltd
+; XOP-NEXT: idivl %esi
+; XOP-NEXT: vmovd %eax, %xmm2
+; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; XOP-NEXT: vpextrd $2, %xmm0, %eax
+; XOP-NEXT: vpextrd $2, %xmm1, %ecx
+; XOP-NEXT: cltd
+; XOP-NEXT: idivl %ecx
+; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; XOP-NEXT: vpextrd $3, %xmm0, %eax
+; XOP-NEXT: vpextrd $3, %xmm1, %ecx
+; XOP-NEXT: cltd
+; XOP-NEXT: idivl %ecx
+; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; XOP-NEXT: retq
+ %r = sdiv <4 x i1> %x, %y
+ ret <4 x i1> %r
+}
Modified: llvm/trunk/test/CodeGen/X86/combine-srem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-srem.ll?rev=343392&r1=343391&r2=343392&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-srem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-srem.ll Sat Sep 29 14:00:37 2018
@@ -458,3 +458,83 @@ define i32 @ossfuzz6883() {
%B6 = and i32 %B16, %B10
ret i32 %B6
}
+
+define i1 @bool_srem(i1 %x, i1 %y) {
+; CHECK-LABEL: bool_srem:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: andb $1, %al
+; CHECK-NEXT: negb %al
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: cbtw
+; CHECK-NEXT: andb $1, %sil
+; CHECK-NEXT: negb %sil
+; CHECK-NEXT: idivb %sil
+; CHECK-NEXT: movsbl %ah, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %r = srem i1 %x, %y
+ ret i1 %r
+}
+define <4 x i1> @boolvec_srem(<4 x i1> %x, <4 x i1> %y) {
+; SSE-LABEL: boolvec_srem:
+; SSE: # %bb.0:
+; SSE-NEXT: pslld $31, %xmm1
+; SSE-NEXT: psrad $31, %xmm1
+; SSE-NEXT: pslld $31, %xmm0
+; SSE-NEXT: psrad $31, %xmm0
+; SSE-NEXT: pextrd $1, %xmm0, %eax
+; SSE-NEXT: pextrd $1, %xmm1, %ecx
+; SSE-NEXT: cltd
+; SSE-NEXT: idivl %ecx
+; SSE-NEXT: movl %edx, %ecx
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movd %xmm1, %esi
+; SSE-NEXT: cltd
+; SSE-NEXT: idivl %esi
+; SSE-NEXT: movd %edx, %xmm2
+; SSE-NEXT: pinsrd $1, %ecx, %xmm2
+; SSE-NEXT: pextrd $2, %xmm0, %eax
+; SSE-NEXT: pextrd $2, %xmm1, %ecx
+; SSE-NEXT: cltd
+; SSE-NEXT: idivl %ecx
+; SSE-NEXT: pinsrd $2, %edx, %xmm2
+; SSE-NEXT: pextrd $3, %xmm0, %eax
+; SSE-NEXT: pextrd $3, %xmm1, %ecx
+; SSE-NEXT: cltd
+; SSE-NEXT: idivl %ecx
+; SSE-NEXT: pinsrd $3, %edx, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: boolvec_srem:
+; AVX: # %bb.0:
+; AVX-NEXT: vpslld $31, %xmm1, %xmm1
+; AVX-NEXT: vpsrad $31, %xmm1, %xmm1
+; AVX-NEXT: vpslld $31, %xmm0, %xmm0
+; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
+; AVX-NEXT: vpextrd $1, %xmm0, %eax
+; AVX-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX-NEXT: cltd
+; AVX-NEXT: idivl %ecx
+; AVX-NEXT: movl %edx, %ecx
+; AVX-NEXT: vmovd %xmm0, %eax
+; AVX-NEXT: vmovd %xmm1, %esi
+; AVX-NEXT: cltd
+; AVX-NEXT: idivl %esi
+; AVX-NEXT: vmovd %edx, %xmm2
+; AVX-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX-NEXT: vpextrd $2, %xmm0, %eax
+; AVX-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX-NEXT: cltd
+; AVX-NEXT: idivl %ecx
+; AVX-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
+; AVX-NEXT: vpextrd $3, %xmm0, %eax
+; AVX-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX-NEXT: cltd
+; AVX-NEXT: idivl %ecx
+; AVX-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
+; AVX-NEXT: retq
+ %r = srem <4 x i1> %x, %y
+ ret <4 x i1> %r
+}
Modified: llvm/trunk/test/CodeGen/X86/combine-udiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-udiv.ll?rev=343392&r1=343391&r2=343392&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-udiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-udiv.ll Sat Sep 29 14:00:37 2018
@@ -907,3 +907,170 @@ define <8 x i16> @pr38477(<8 x i16> %a0)
%1 = udiv <8 x i16> %a0, <i16 1, i16 119, i16 73, i16 -111, i16 -3, i16 118, i16 32, i16 31>
ret <8 x i16> %1
}
+
+define i1 @bool_udiv(i1 %x, i1 %y) {
+; CHECK-LABEL: bool_udiv:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $1, %sil
+; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax def $ax
+; CHECK-NEXT: divb %sil
+; CHECK-NEXT: retq
+ %r = udiv i1 %x, %y
+ ret i1 %r
+}
+
+define <4 x i1> @boolvec_udiv(<4 x i1> %x, <4 x i1> %y) {
+; SSE2-LABEL: boolvec_udiv:
+; SSE2: # %bb.0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; SSE2-NEXT: pand %xmm2, %xmm1
+; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
+; SSE2-NEXT: movd %xmm2, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
+; SSE2-NEXT: movd %xmm2, %ecx
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: divl %ecx
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
+; SSE2-NEXT: movd %xmm3, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; SSE2-NEXT: movd %xmm3, %ecx
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: divl %ecx
+; SSE2-NEXT: movd %eax, %xmm3
+; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: movd %xmm1, %ecx
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: divl %ecx
+; SSE2-NEXT: movd %eax, %xmm2
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %eax
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
+; SSE2-NEXT: movd %xmm0, %ecx
+; SSE2-NEXT: xorl %edx, %edx
+; SSE2-NEXT: divl %ecx
+; SSE2-NEXT: movd %eax, %xmm0
+; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
+; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE2-NEXT: movdqa %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: boolvec_udiv:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; SSE41-NEXT: pand %xmm2, %xmm1
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: pextrd $1, %xmm0, %eax
+; SSE41-NEXT: pextrd $1, %xmm1, %ecx
+; SSE41-NEXT: xorl %edx, %edx
+; SSE41-NEXT: divl %ecx
+; SSE41-NEXT: movl %eax, %ecx
+; SSE41-NEXT: movd %xmm0, %eax
+; SSE41-NEXT: movd %xmm1, %esi
+; SSE41-NEXT: xorl %edx, %edx
+; SSE41-NEXT: divl %esi
+; SSE41-NEXT: movd %eax, %xmm2
+; SSE41-NEXT: pinsrd $1, %ecx, %xmm2
+; SSE41-NEXT: pextrd $2, %xmm0, %eax
+; SSE41-NEXT: pextrd $2, %xmm1, %ecx
+; SSE41-NEXT: xorl %edx, %edx
+; SSE41-NEXT: divl %ecx
+; SSE41-NEXT: pinsrd $2, %eax, %xmm2
+; SSE41-NEXT: pextrd $3, %xmm0, %eax
+; SSE41-NEXT: pextrd $3, %xmm1, %ecx
+; SSE41-NEXT: xorl %edx, %edx
+; SSE41-NEXT: divl %ecx
+; SSE41-NEXT: pinsrd $3, %eax, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX1-LABEL: boolvec_udiv:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
+; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %ecx
+; AVX1-NEXT: movl %eax, %ecx
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm1, %esi
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %esi
+; AVX1-NEXT: vmovd %eax, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %ecx
+; AVX1-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %eax
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %ecx
+; AVX1-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: boolvec_udiv:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %ecx
+; AVX2-NEXT: movl %eax, %ecx
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm1, %esi
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %esi
+; AVX2-NEXT: vmovd %eax, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %ecx
+; AVX2-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %ecx
+; AVX2-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; AVX2-NEXT: retq
+;
+; XOP-LABEL: boolvec_udiv:
+; XOP: # %bb.0:
+; XOP-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
+; XOP-NEXT: vpand %xmm2, %xmm1, %xmm1
+; XOP-NEXT: vpand %xmm2, %xmm0, %xmm0
+; XOP-NEXT: vpextrd $1, %xmm0, %eax
+; XOP-NEXT: vpextrd $1, %xmm1, %ecx
+; XOP-NEXT: xorl %edx, %edx
+; XOP-NEXT: divl %ecx
+; XOP-NEXT: movl %eax, %ecx
+; XOP-NEXT: vmovd %xmm0, %eax
+; XOP-NEXT: vmovd %xmm1, %esi
+; XOP-NEXT: xorl %edx, %edx
+; XOP-NEXT: divl %esi
+; XOP-NEXT: vmovd %eax, %xmm2
+; XOP-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; XOP-NEXT: vpextrd $2, %xmm0, %eax
+; XOP-NEXT: vpextrd $2, %xmm1, %ecx
+; XOP-NEXT: xorl %edx, %edx
+; XOP-NEXT: divl %ecx
+; XOP-NEXT: vpinsrd $2, %eax, %xmm2, %xmm2
+; XOP-NEXT: vpextrd $3, %xmm0, %eax
+; XOP-NEXT: vpextrd $3, %xmm1, %ecx
+; XOP-NEXT: xorl %edx, %edx
+; XOP-NEXT: divl %ecx
+; XOP-NEXT: vpinsrd $3, %eax, %xmm2, %xmm0
+; XOP-NEXT: retq
+ %r = udiv <4 x i1> %x, %y
+ ret <4 x i1> %r
+}
Modified: llvm/trunk/test/CodeGen/X86/combine-urem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-urem.ll?rev=343392&r1=343391&r2=343392&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-urem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-urem.ll Sat Sep 29 14:00:37 2018
@@ -379,3 +379,107 @@ define <4 x i32> @combine_vec_urem_by_sh
%2 = urem <4 x i32> %x, %1
ret <4 x i32> %2
}
+
+define i1 @bool_urem(i1 %x, i1 %y) {
+; CHECK-LABEL: bool_urem:
+; CHECK: # %bb.0:
+; CHECK-NEXT: andb $1, %sil
+; CHECK-NEXT: andb $1, %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: # kill: def $eax killed $eax def $ax
+; CHECK-NEXT: divb %sil
+; CHECK-NEXT: movzbl %ah, %eax
+; CHECK-NEXT: # kill: def $al killed $al killed $eax
+; CHECK-NEXT: retq
+ %r = urem i1 %x, %y
+ ret i1 %r
+}
+
+define <4 x i1> @boolvec_urem(<4 x i1> %x, <4 x i1> %y) {
+; SSE-LABEL: boolvec_urem:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: pextrd $1, %xmm0, %eax
+; SSE-NEXT: pextrd $1, %xmm1, %ecx
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: divl %ecx
+; SSE-NEXT: movl %edx, %ecx
+; SSE-NEXT: movd %xmm0, %eax
+; SSE-NEXT: movd %xmm1, %esi
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: divl %esi
+; SSE-NEXT: movd %edx, %xmm2
+; SSE-NEXT: pinsrd $1, %ecx, %xmm2
+; SSE-NEXT: pextrd $2, %xmm0, %eax
+; SSE-NEXT: pextrd $2, %xmm1, %ecx
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: divl %ecx
+; SSE-NEXT: pinsrd $2, %edx, %xmm2
+; SSE-NEXT: pextrd $3, %xmm0, %eax
+; SSE-NEXT: pextrd $3, %xmm1, %ecx
+; SSE-NEXT: xorl %edx, %edx
+; SSE-NEXT: divl %ecx
+; SSE-NEXT: pinsrd $3, %edx, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: boolvec_urem:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpextrd $1, %xmm0, %eax
+; AVX1-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %ecx
+; AVX1-NEXT: movl %edx, %ecx
+; AVX1-NEXT: vmovd %xmm0, %eax
+; AVX1-NEXT: vmovd %xmm1, %esi
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %esi
+; AVX1-NEXT: vmovd %edx, %xmm2
+; AVX1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $2, %xmm0, %eax
+; AVX1-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %ecx
+; AVX1-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
+; AVX1-NEXT: vpextrd $3, %xmm0, %eax
+; AVX1-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX1-NEXT: xorl %edx, %edx
+; AVX1-NEXT: divl %ecx
+; AVX1-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: boolvec_urem:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1]
+; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpextrd $1, %xmm0, %eax
+; AVX2-NEXT: vpextrd $1, %xmm1, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %ecx
+; AVX2-NEXT: movl %edx, %ecx
+; AVX2-NEXT: vmovd %xmm0, %eax
+; AVX2-NEXT: vmovd %xmm1, %esi
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %esi
+; AVX2-NEXT: vmovd %edx, %xmm2
+; AVX2-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $2, %xmm0, %eax
+; AVX2-NEXT: vpextrd $2, %xmm1, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %ecx
+; AVX2-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2
+; AVX2-NEXT: vpextrd $3, %xmm0, %eax
+; AVX2-NEXT: vpextrd $3, %xmm1, %ecx
+; AVX2-NEXT: xorl %edx, %edx
+; AVX2-NEXT: divl %ecx
+; AVX2-NEXT: vpinsrd $3, %edx, %xmm2, %xmm0
+; AVX2-NEXT: retq
+ %r = urem <4 x i1> %x, %y
+ ret <4 x i1> %r
+}
More information about the llvm-commits
mailing list