[llvm] r367294 - [NFC][X86][AArch64] Revisit test coverage for X s% C == 0 fold - add tests for negative divisors, INT_MIN divisors
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 30 01:00:50 PDT 2019
Author: lebedevri
Date: Tue Jul 30 01:00:49 2019
New Revision: 367294
URL: http://llvm.org/viewvc/llvm-project?rev=367294&view=rev
Log:
[NFC][X86][AArch64] Revisit test coverage for X s% C == 0 fold - add tests for negative divisors, INT_MIN divisors
As discussed in the review, that fold is only valid for positive
divisors, so while we can negate negative divisors,
we have to special-case INT_MIN.
Modified:
llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
llvm/trunk/test/CodeGen/AArch64/srem-seteq.ll
llvm/trunk/test/CodeGen/AArch64/urem-seteq-optsize.ll
llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
llvm/trunk/test/CodeGen/AArch64/urem-seteq.ll
llvm/trunk/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
llvm/trunk/test/CodeGen/X86/srem-seteq-vec-splat.ll
llvm/trunk/test/CodeGen/X86/srem-seteq.ll
llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
llvm/trunk/test/CodeGen/X86/urem-seteq-vec-splat.ll
llvm/trunk/test/CodeGen/X86/urem-seteq.ll
Modified: llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll Tue Jul 30 01:00:49 2019
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
-; At the moment, BuildSREMEqFold does not handle nonsplat vectors.
-
; Odd+Even divisors
define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_srem_odd_even:
@@ -425,115 +423,103 @@ define <4 x i32> @test_srem_odd_even_one
ret <4 x i32> %ret
}
-;==============================================================================;
+;------------------------------------------------------------------------------;
-; One all-ones divisor and power-of-two divisor divisor in odd divisor
-define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_allones_and_poweroftwo:
+; One INT_MIN divisor in odd divisor
+define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_INT_MIN:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI13_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
; CHECK-NEXT: adrp x8, .LCPI13_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_1]
; CHECK-NEXT: adrp x8, .LCPI13_2
+; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI13_2]
; CHECK-NEXT: adrp x8, .LCPI13_3
-; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
-; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
-; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI13_3]
-; CHECK-NEXT: adrp x8, .LCPI13_4
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_4]
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
-; CHECK-NEXT: ushr v1.4s, v1.4s, #31
-; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
-; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
-; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: usra v3.4s, v1.4s, #31
+; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
+ %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and power-of-two divisor divisor in even divisor
-define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_even_allones_and_poweroftwo:
+; One INT_MIN divisor in even divisor
+define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_even_INT_MIN:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
; CHECK-NEXT: adrp x8, .LCPI14_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1]
; CHECK-NEXT: adrp x8, .LCPI14_2
+; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_2]
; CHECK-NEXT: adrp x8, .LCPI14_3
-; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
-; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
-; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_3]
-; CHECK-NEXT: adrp x8, .LCPI14_4
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_4]
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
-; CHECK-NEXT: ushr v1.4s, v1.4s, #31
-; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
-; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
-; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: usra v3.4s, v1.4s, #31
+; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
+ %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
-define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_even_allones_and_poweroftwo:
+; One INT_MIN divisor in odd+even divisor
+define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_even_INT_MIN:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI15_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
; CHECK-NEXT: adrp x8, .LCPI15_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
; CHECK-NEXT: adrp x8, .LCPI15_2
+; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_2]
; CHECK-NEXT: adrp x8, .LCPI15_3
-; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
-; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
-; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_3]
-; CHECK-NEXT: adrp x8, .LCPI15_4
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_4]
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
-; CHECK-NEXT: ushr v1.4s, v1.4s, #31
-; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
-; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
-; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: usra v3.4s, v1.4s, #31
+; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
+ %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-;------------------------------------------------------------------------------;
+;==============================================================================;
-; One all-ones divisor and one one divisor in odd divisor
-define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_allones_and_one:
+; One all-ones divisor and power-of-two divisor divisor in odd divisor
+define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI16_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
@@ -559,15 +545,15 @@ define <4 x i32> @test_srem_odd_allones_
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
+ %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and one one divisor in even divisor
-define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_even_allones_and_one:
+; One all-ones divisor and power-of-two divisor divisor in even divisor
+define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_even_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI17_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
@@ -593,15 +579,15 @@ define <4 x i32> @test_srem_even_allones
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
+ %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and one one divisor in odd+even divisor
-define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_even_allones_and_one:
+; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
+define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_even_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
@@ -627,7 +613,7 @@ define <4 x i32> @test_srem_odd_even_all
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
+ %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
@@ -635,9 +621,9 @@ define <4 x i32> @test_srem_odd_even_all
;------------------------------------------------------------------------------;
-; One power-of-two divisor divisor and one divisor in odd divisor
-define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_poweroftwo_and_one:
+; One all-ones divisor and one one divisor in odd divisor
+define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_allones_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI19_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
@@ -663,15 +649,15 @@ define <4 x i32> @test_srem_odd_poweroft
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
+ %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One power-of-two divisor divisor and one divisor in even divisor
-define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_even_poweroftwo_and_one:
+; One all-ones divisor and one one divisor in even divisor
+define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_even_allones_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI20_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
@@ -679,31 +665,33 @@ define <4 x i32> @test_srem_even_powerof
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_1]
; CHECK-NEXT: adrp x8, .LCPI20_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI20_2]
+; CHECK-NEXT: adrp x8, .LCPI20_3
; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
-; CHECK-NEXT: adrp x8, .LCPI20_3
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI20_3]
-; CHECK-NEXT: neg v2.4s, v2.4s
-; CHECK-NEXT: add v1.4s, v1.4s, v0.4s
-; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s
+; CHECK-NEXT: adrp x8, .LCPI20_4
+; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_4]
+; CHECK-NEXT: neg v3.4s, v3.4s
+; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
; CHECK-NEXT: ushr v1.4s, v1.4s, #31
-; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
-; CHECK-NEXT: mls v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
+ %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One power-of-two divisor divisor and one divisor in odd+even divisor
-define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_even_poweroftwo_and_one:
+; One all-ones divisor and one one divisor in odd+even divisor
+define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_even_allones_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI21_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
@@ -729,7 +717,7 @@ define <4 x i32> @test_srem_odd_even_pow
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
+ %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
@@ -737,21 +725,123 @@ define <4 x i32> @test_srem_odd_even_pow
;------------------------------------------------------------------------------;
-define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
+; One power-of-two divisor divisor and one divisor in odd divisor
+define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_poweroftwo_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI22_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
; CHECK-NEXT: adrp x8, .LCPI22_1
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_1]
; CHECK-NEXT: adrp x8, .LCPI22_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI22_2]
+; CHECK-NEXT: adrp x8, .LCPI22_3
; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_2]
-; CHECK-NEXT: adrp x8, .LCPI22_3
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI22_3]
+; CHECK-NEXT: adrp x8, .LCPI22_4
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_3]
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_4]
+; CHECK-NEXT: neg v3.4s, v3.4s
+; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #31
+; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One power-of-two divisor divisor and one divisor in even divisor
+define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_even_poweroftwo_and_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI23_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
+; CHECK-NEXT: adrp x8, .LCPI23_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_1]
+; CHECK-NEXT: adrp x8, .LCPI23_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI23_2]
+; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: adrp x8, .LCPI23_3
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI23_3]
+; CHECK-NEXT: neg v2.4s, v2.4s
+; CHECK-NEXT: add v1.4s, v1.4s, v0.4s
+; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #31
+; CHECK-NEXT: and v1.16b, v1.16b, v3.16b
+; CHECK-NEXT: add v1.4s, v2.4s, v1.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One power-of-two divisor divisor and one divisor in odd+even divisor
+define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_even_poweroftwo_and_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI24_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
+; CHECK-NEXT: adrp x8, .LCPI24_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_1]
+; CHECK-NEXT: adrp x8, .LCPI24_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI24_2]
+; CHECK-NEXT: adrp x8, .LCPI24_3
+; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI24_3]
+; CHECK-NEXT: adrp x8, .LCPI24_4
+; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_4]
+; CHECK-NEXT: neg v3.4s, v3.4s
+; CHECK-NEXT: sshl v3.4s, v1.4s, v3.4s
+; CHECK-NEXT: ushr v1.4s, v1.4s, #31
+; CHECK-NEXT: and v1.16b, v1.16b, v4.16b
+; CHECK-NEXT: add v1.4s, v3.4s, v1.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+;------------------------------------------------------------------------------;
+
+define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI25_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0]
+; CHECK-NEXT: adrp x8, .LCPI25_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_1]
+; CHECK-NEXT: adrp x8, .LCPI25_2
+; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI25_2]
+; CHECK-NEXT: adrp x8, .LCPI25_3
+; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_3]
; CHECK-NEXT: neg v4.4s, v4.4s
; CHECK-NEXT: movi v3.2d, #0x000000ffffffff
; CHECK-NEXT: sshl v4.4s, v1.4s, v4.4s
@@ -772,18 +862,18 @@ define <4 x i32> @test_srem_odd_allones_
define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
; CHECK: // %bb.0:
-; CHECK-NEXT: adrp x8, .LCPI23_0
-; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
-; CHECK-NEXT: adrp x8, .LCPI23_1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_1]
-; CHECK-NEXT: adrp x8, .LCPI23_2
+; CHECK-NEXT: adrp x8, .LCPI26_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0]
+; CHECK-NEXT: adrp x8, .LCPI26_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
+; CHECK-NEXT: adrp x8, .LCPI26_2
; CHECK-NEXT: smull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI23_2]
-; CHECK-NEXT: adrp x8, .LCPI23_3
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI26_2]
+; CHECK-NEXT: adrp x8, .LCPI26_3
; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3]
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3]
; CHECK-NEXT: neg v4.4s, v4.4s
; CHECK-NEXT: movi v3.2d, #0x000000ffffffff
; CHECK-NEXT: sshl v4.4s, v1.4s, v4.4s
Modified: llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-splat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-splat.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/srem-seteq-vec-splat.ll Tue Jul 30 01:00:49 2019
@@ -49,6 +49,56 @@ define <4 x i32> @test_srem_even_100(<4
ret <4 x i32> %ret
}
+; Negative divisors should be negated, and thus this is still splat vectors.
+
+; Odd divisor
+define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_odd_neg25:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI2_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT: adrp x8, .LCPI2_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_1]
+; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: sshr v3.4s, v1.4s, #3
+; CHECK-NEXT: usra v3.4s, v1.4s, #31
+; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Even divisors
+define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_even_neg100:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: adrp x8, .LCPI3_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s
+; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: sshr v3.4s, v1.4s, #5
+; CHECK-NEXT: usra v3.4s, v1.4s, #31
+; CHECK-NEXT: mls v0.4s, v3.4s, v2.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
;------------------------------------------------------------------------------;
; Comparison constant has undef elements.
;------------------------------------------------------------------------------;
@@ -103,6 +153,27 @@ define <4 x i32> @test_srem_even_undef1(
; Negative tests
;------------------------------------------------------------------------------;
+define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_one_eq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_one_ne:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
; We can lower remainder of division by powers of two much better elsewhere.
define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_srem_pow2:
@@ -122,36 +193,34 @@ define <4 x i32> @test_srem_pow2(<4 x i3
ret <4 x i32> %ret
}
-; We could lower remainder of division by all-ones much better elsewhere.
-define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_allones:
+; We could lower remainder of division by INT_MIN much better elsewhere.
+define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_int_min:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: sshr v1.4s, v0.4s, #31
+; CHECK-NEXT: mov v2.16b, v0.16b
+; CHECK-NEXT: movi v3.4s, #128, lsl #24
+; CHECK-NEXT: usra v2.4s, v1.4s, #1
+; CHECK-NEXT: and v1.16b, v2.16b, v3.16b
+; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
+ %srem = srem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; If all divisors are ones, this is constant-folded.
-define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_one_eq:
+; We could lower remainder of division by all-ones much better elsewhere.
+define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_srem_allones:
; CHECK: // %bb.0:
; CHECK-NEXT: movi v0.4s, #1
; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_srem_one_ne:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
- %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
- %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
- %ret = zext <4 x i1> %cmp to <4 x i32>
- ret <4 x i32> %ret
-}
Modified: llvm/trunk/test/CodeGen/AArch64/srem-seteq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/srem-seteq.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/srem-seteq.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/srem-seteq.ll Tue Jul 30 01:00:49 2019
@@ -204,11 +204,50 @@ define i32 @test_srem_odd_setne(i32 %X)
ret i32 %ret
}
+; The fold is only valid for positive divisors, negative-ones should be negated.
+define i32 @test_srem_negative_odd(i32 %X) nounwind {
+; CHECK-LABEL: test_srem_negative_odd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-1717986919
+; CHECK-NEXT: smull x8, w0, w8
+; CHECK-NEXT: lsr x9, x8, #63
+; CHECK-NEXT: asr x8, x8, #33
+; CHECK-NEXT: add w8, w8, w9
+; CHECK-NEXT: add w8, w8, w8, lsl #2
+; CHECK-NEXT: cmn w0, w8
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %srem = srem i32 %X, -5
+ %cmp = icmp ne i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+define i32 @test_srem_negative_even(i32 %X) nounwind {
+; CHECK-LABEL: test_srem_negative_even:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #56173
+; CHECK-NEXT: movk w8, #28086, lsl #16
+; CHECK-NEXT: smull x8, w0, w8
+; CHECK-NEXT: lsr x8, x8, #32
+; CHECK-NEXT: sub w8, w8, w0
+; CHECK-NEXT: asr w9, w8, #3
+; CHECK-NEXT: add w8, w9, w8, lsr #31
+; CHECK-NEXT: mov w9, #-14
+; CHECK-NEXT: msub w8, w8, w9, w0
+; CHECK-NEXT: cmp w8, #0 // =0
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %srem = srem i32 %X, -14
+ %cmp = icmp ne i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
;------------------------------------------------------------------------------;
; Negative tests
;------------------------------------------------------------------------------;
-; The fold is invalid if divisor is 1.
+; We can lower remainder of division by one much better elsewhere.
define i32 @test_srem_one(i32 %X) nounwind {
; CHECK-LABEL: test_srem_one:
; CHECK: // %bb.0:
@@ -220,33 +259,51 @@ define i32 @test_srem_one(i32 %X) nounwi
ret i32 %ret
}
-; We can lower remainder of division by all-ones much better elsewhere.
-define i32 @test_srem_allones(i32 %X) nounwind {
-; CHECK-LABEL: test_srem_allones:
+; We can lower remainder of division by powers of two much better elsewhere.
+define i32 @test_srem_pow2(i32 %X) nounwind {
+; CHECK-LABEL: test_srem_pow2:
; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, #15 // =15
; CHECK-NEXT: cmp w0, #0 // =0
-; CHECK-NEXT: csel w8, w0, w0, lt
+; CHECK-NEXT: csel w8, w8, w0, lt
+; CHECK-NEXT: and w8, w8, #0xfffffff0
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
- %srem = srem i32 %X, 4294967295
+ %srem = srem i32 %X, 16
%cmp = icmp eq i32 %srem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
}
-; We can lower remainder of division by powers of two much better elsewhere.
-define i32 @test_srem_pow2(i32 %X) nounwind {
-; CHECK-LABEL: test_srem_pow2:
+; The fold is only valid for positive divisors, and we can't negate INT_MIN.
+define i32 @test_srem_int_min(i32 %X) nounwind {
+; CHECK-LABEL: test_srem_int_min:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, #15 // =15
+; CHECK-NEXT: mov w8, #2147483647
+; CHECK-NEXT: add w8, w0, w8
; CHECK-NEXT: cmp w0, #0 // =0
; CHECK-NEXT: csel w8, w8, w0, lt
-; CHECK-NEXT: and w8, w8, #0xfffffff0
+; CHECK-NEXT: and w8, w8, #0x80000000
+; CHECK-NEXT: cmn w0, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %srem = srem i32 %X, 2147483648
+ %cmp = icmp eq i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+; We can lower remainder of division by all-ones much better elsewhere.
+define i32 @test_srem_allones(i32 %X) nounwind {
+; CHECK-LABEL: test_srem_allones:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp w0, #0 // =0
+; CHECK-NEXT: csel w8, w0, w0, lt
; CHECK-NEXT: cmp w0, w8
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
- %srem = srem i32 %X, 16
+ %srem = srem i32 %X, 4294967295
%cmp = icmp eq i32 %srem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
Modified: llvm/trunk/test/CodeGen/AArch64/urem-seteq-optsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/urem-seteq-optsize.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/urem-seteq-optsize.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/urem-seteq-optsize.ll Tue Jul 30 01:00:49 2019
@@ -1,9 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
-; On AArch64, division in expensive. BuildRemEqFold should therefore run even
-; when optimizing for size. Only optimizing for minimum size retains a plain div.
-
define i32 @test_minsize(i32 %X) optsize minsize nounwind readnone {
; CHECK-LABEL: test_minsize:
; CHECK: // %bb.0:
Modified: llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll Tue Jul 30 01:00:49 2019
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
-; At the moment, BuildUREMEqFold does not handle nonsplat vectors.
-
; Odd+Even divisors
define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_odd_even:
@@ -365,11 +363,11 @@ define <4 x i32> @test_urem_odd_even_one
ret <4 x i32> %ret
}
-;==============================================================================;
+;------------------------------------------------------------------------------;
-; One all-ones divisor and power-of-two divisor divisor in odd divisor
-define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo:
+; One INT_MIN divisor in odd divisor
+define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_INT_MIN:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI13_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0]
@@ -387,15 +385,15 @@ define <4 x i32> @test_urem_odd_allones_
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
+ %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and power-of-two divisor divisor in even divisor
-define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_even_allones_and_poweroftwo:
+; One INT_MIN divisor in even divisor
+define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_even_INT_MIN:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI14_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0]
@@ -417,15 +415,15 @@ define <4 x i32> @test_urem_even_allones
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
+ %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
-define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_even_allones_and_poweroftwo:
+; One INT_MIN divisor in odd+even divisor
+define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_even_INT_MIN:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI15_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0]
@@ -433,46 +431,57 @@ define <4 x i32> @test_urem_odd_even_all
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1]
; CHECK-NEXT: adrp x8, .LCPI15_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_2]
-; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
-; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: neg v1.4s, v1.4s
+; CHECK-NEXT: adrp x8, .LCPI15_3
+; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
+; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
+; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3]
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: neg v2.4s, v2.4s
-; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT: neg v3.4s, v3.4s
+; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
+ %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-;------------------------------------------------------------------------------;
+;==============================================================================;
-; One all-ones divisor and one one divisor in odd divisor
-define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_allones_and_one:
+; One all-ones divisor and power-of-two divisor divisor in odd divisor
+define <4 x i32> @test_urem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI16_0
-; CHECK-NEXT: adrp x9, .LCPI16_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0]
-; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1]
-; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: adrp x8, .LCPI16_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_1]
+; CHECK-NEXT: adrp x8, .LCPI16_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI16_2]
+; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: neg v2.4s, v2.4s
+; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
+ %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and one one divisor in even divisor
-define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_even_allones_and_one:
+; One all-ones divisor and power-of-two divisor divisor in even divisor
+define <4 x i32> @test_urem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_even_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI17_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0]
@@ -486,26 +495,23 @@ define <4 x i32> @test_urem_even_allones
; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3]
-; CHECK-NEXT: adrp x8, .LCPI17_4
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_4]
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
-; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
-; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
+ %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One all-ones divisor and one one divisor in odd+even divisor
-define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_even_allones_and_one:
+; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
+define <4 x i32> @test_urem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_even_allones_and_poweroftwo:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI18_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0]
@@ -513,20 +519,17 @@ define <4 x i32> @test_urem_odd_even_all
; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_1]
; CHECK-NEXT: adrp x8, .LCPI18_2
; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_2]
-; CHECK-NEXT: adrp x8, .LCPI18_3
; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_3]
; CHECK-NEXT: neg v2.4s, v2.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
-; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v3.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
+ %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
@@ -534,38 +537,28 @@ define <4 x i32> @test_urem_odd_even_all
;------------------------------------------------------------------------------;
-; One power-of-two divisor divisor and one divisor in odd divisor
-define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_poweroftwo_and_one:
+; One all-ones divisor and one one divisor in odd divisor
+define <4 x i32> @test_urem_odd_allones_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_allones_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI19_0
+; CHECK-NEXT: adrp x9, .LCPI19_1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0]
-; CHECK-NEXT: adrp x8, .LCPI19_1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_1]
-; CHECK-NEXT: adrp x8, .LCPI19_2
-; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI19_2]
-; CHECK-NEXT: adrp x8, .LCPI19_3
-; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
-; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
-; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
-; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI19_3]
-; CHECK-NEXT: neg v2.4s, v2.4s
-; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
-; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
-; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
-; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI19_1]
+; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
+ %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One power-of-two divisor divisor and one divisor in even divisor
-define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_even_poweroftwo_and_one:
+; One all-ones divisor and one one divisor in even divisor
+define <4 x i32> @test_urem_even_allones_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_even_allones_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI20_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
@@ -590,15 +583,15 @@ define <4 x i32> @test_urem_even_powerof
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
+ %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; One power-of-two divisor divisor and one divisor in odd+even divisor
-define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_even_poweroftwo_and_one:
+; One all-ones divisor and one one divisor in odd+even divisor
+define <4 x i32> @test_urem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_even_allones_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI21_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0]
@@ -619,7 +612,7 @@ define <4 x i32> @test_urem_odd_even_pow
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
+ %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
@@ -627,8 +620,9 @@ define <4 x i32> @test_urem_odd_even_pow
;------------------------------------------------------------------------------;
-define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
+; One power-of-two divisor divisor and one divisor in odd divisor
+define <4 x i32> @test_urem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_poweroftwo_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI22_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0]
@@ -649,14 +643,15 @@ define <4 x i32> @test_urem_odd_allones_
; CHECK-NEXT: movi v1.4s, #1
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
+ %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
+; One power-of-two divisor divisor and one divisor in even divisor
+define <4 x i32> @test_urem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_even_poweroftwo_and_one:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI23_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0]
@@ -676,6 +671,97 @@ define <4 x i32> @test_urem_even_allones
; CHECK-NEXT: neg v3.4s, v3.4s
; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
+; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One power-of-two divisor divisor and one divisor in odd+even divisor
+define <4 x i32> @test_urem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_even_poweroftwo_and_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI24_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0]
+; CHECK-NEXT: adrp x8, .LCPI24_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_1]
+; CHECK-NEXT: adrp x8, .LCPI24_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI24_2]
+; CHECK-NEXT: adrp x8, .LCPI24_3
+; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI24_3]
+; CHECK-NEXT: neg v2.4s, v2.4s
+; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
+; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+;------------------------------------------------------------------------------;
+
+define <4 x i32> @test_urem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_allones_and_poweroftwo_and_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI25_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0]
+; CHECK-NEXT: adrp x8, .LCPI25_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_1]
+; CHECK-NEXT: adrp x8, .LCPI25_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI25_2]
+; CHECK-NEXT: adrp x8, .LCPI25_3
+; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s
+; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI25_3]
+; CHECK-NEXT: neg v2.4s, v2.4s
+; CHECK-NEXT: ushl v1.4s, v1.4s, v2.4s
+; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b
+; CHECK-NEXT: mls v0.4s, v3.4s, v4.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_even_allones_and_poweroftwo_and_one:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI26_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0]
+; CHECK-NEXT: adrp x8, .LCPI26_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_1]
+; CHECK-NEXT: adrp x8, .LCPI26_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI26_2]
+; CHECK-NEXT: neg v1.4s, v1.4s
+; CHECK-NEXT: adrp x8, .LCPI26_3
+; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
+; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
+; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3]
+; CHECK-NEXT: adrp x8, .LCPI26_4
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI26_4]
+; CHECK-NEXT: neg v3.4s, v3.4s
+; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
; CHECK-NEXT: mls v0.4s, v2.4s, v4.4s
; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: movi v1.4s, #1
Modified: llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-splat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-splat.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/urem-seteq-vec-splat.ll Tue Jul 30 01:00:49 2019
@@ -45,6 +45,57 @@ define <4 x i32> @test_urem_even_100(<4
ret <4 x i32> %ret
}
+; Negative divisors should be negated, and thus this is still splat vectors.
+
+; Odd divisor
+define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_odd_neg25:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI2_0
+; CHECK-NEXT: adrp x9, .LCPI2_1
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0]
+; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1]
+; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Even divisors
+define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_even_neg100:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI3_0
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
+; CHECK-NEXT: adrp x8, .LCPI3_1
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT: adrp x8, .LCPI3_2
+; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_2]
+; CHECK-NEXT: neg v1.4s, v1.4s
+; CHECK-NEXT: adrp x8, .LCPI3_3
+; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s
+; CHECK-NEXT: umull2 v4.2d, v1.4s, v2.4s
+; CHECK-NEXT: umull v1.2d, v1.2s, v2.2s
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
+; CHECK-NEXT: uzp2 v1.4s, v1.4s, v4.4s
+; CHECK-NEXT: neg v3.4s, v3.4s
+; CHECK-NEXT: ushl v1.4s, v1.4s, v3.4s
+; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
;------------------------------------------------------------------------------;
; Comparison constant has undef elements.
;------------------------------------------------------------------------------;
@@ -97,6 +148,27 @@ define <4 x i32> @test_urem_even_undef1(
; Negative tests
;------------------------------------------------------------------------------;
+define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_one_eq:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_one_ne:
+; CHECK: // %bb.0:
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: ret
+ %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
; We can lower remainder of division by powers of two much better elsewhere.
define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_pow2:
@@ -113,39 +185,32 @@ define <4 x i32> @test_urem_pow2(<4 x i3
ret <4 x i32> %ret
}
-; We could lower remainder of division by all-ones much better elsewhere.
-define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_allones:
+; We could lower remainder of division by INT_MIN much better elsewhere.
+define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_int_min:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg v0.4s, v0.4s
+; CHECK-NEXT: bic v0.4s, #128, lsl #24
; CHECK-NEXT: movi v1.4s, #1
-; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: cmeq v0.4s, v0.4s, #0
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
+ %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; If all divisors are ones, this is constant-folded.
-define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_one_eq:
+; We could lower remainder of division by all-ones much better elsewhere.
+define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind {
+; CHECK-LABEL: test_urem_allones:
; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.4s, #1
+; CHECK-NEXT: neg v0.4s, v0.4s
+; CHECK-NEXT: movi v1.4s, #1
+; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %urem = urem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
-; CHECK-LABEL: test_urem_one_ne:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi v0.2d, #0000000000000000
-; CHECK-NEXT: ret
- %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
- %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
- %ret = zext <4 x i1> %cmp to <4 x i32>
- ret <4 x i32> %ret
-}
Modified: llvm/trunk/test/CodeGen/AArch64/urem-seteq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/urem-seteq.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/urem-seteq.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/urem-seteq.ll Tue Jul 30 01:00:49 2019
@@ -5,10 +5,6 @@
; Odd divisors
;------------------------------------------------------------------------------;
-; This tests the BuildREMEqFold optimization with UREM, i32, odd divisor, SETEQ.
-; The corresponding pseudocode is:
-; Q <- [N * multInv(5, 2^32)] <=> [N * 0xCCCCCCCD] <=> [N * (-858993459)]
-; res <- [Q <= (2^32 - 1) / 5] <=> [Q <= 858993459] <=> [Q < 858993460]
define i32 @test_urem_odd(i32 %X) nounwind {
; CHECK-LABEL: test_urem_odd:
; CHECK: // %bb.0:
@@ -79,12 +75,6 @@ define i32 @test_urem_odd_bit31(i32 %X)
; Even divisors
;------------------------------------------------------------------------------;
-; This tests the BuildREMEqFold optimization with UREM, i16, even divisor, SETNE.
-; In this case, D <=> 14 <=> 7 * 2^1, so D0 = 7 and K = 1.
-; The corresponding pseudocode is:
-; Q <- [N * multInv(D0, 2^16)] <=> [N * multInv(7, 2^16)] <=> [N * 28087]
-; Q <- [Q >>rot K] <=> [Q >>rot 1]
-; res <- ![Q <= (2^16 - 1) / 7] <=> ![Q <= 9362] <=> [Q > 9362]
define i16 @test_urem_even(i16 %X) nounwind {
; CHECK-LABEL: test_urem_even:
; CHECK: // %bb.0:
@@ -177,11 +167,41 @@ define i32 @test_urem_odd_setne(i32 %X)
ret i32 %ret
}
+; The fold is only valid for positive divisors, negative-ones should be negated.
+define i32 @test_urem_negative_odd(i32 %X) nounwind {
+; CHECK-LABEL: test_urem_negative_odd:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #858993459
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: cmp w8, #1 // =1
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, -5
+ %cmp = icmp ne i32 %urem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+define i32 @test_urem_negative_even(i32 %X) nounwind {
+; CHECK-LABEL: test_urem_negative_even:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #37449
+; CHECK-NEXT: movk w8, #51492, lsl #16
+; CHECK-NEXT: mul w8, w0, w8
+; CHECK-NEXT: ror w8, w8, #1
+; CHECK-NEXT: cmp w8, #1 // =1
+; CHECK-NEXT: cset w0, hi
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, -14
+ %cmp = icmp ne i32 %urem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
;------------------------------------------------------------------------------;
; Negative tests
;------------------------------------------------------------------------------;
-; The fold is invalid if divisor is 1.
+; We can lower remainder of division by one much better elsewhere.
define i32 @test_urem_one(i32 %X) nounwind {
; CHECK-LABEL: test_urem_one:
; CHECK: // %bb.0:
@@ -193,28 +213,41 @@ define i32 @test_urem_one(i32 %X) nounwi
ret i32 %ret
}
-; We can lower remainder of division by all-ones much better elsewhere.
-define i32 @test_urem_allones(i32 %X) nounwind {
-; CHECK-LABEL: test_urem_allones:
+; We can lower remainder of division by powers of two much better elsewhere.
+define i32 @test_urem_pow2(i32 %X) nounwind {
+; CHECK-LABEL: test_urem_pow2:
; CHECK: // %bb.0:
-; CHECK-NEXT: neg w8, w0
-; CHECK-NEXT: cmp w8, #2 // =2
-; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: tst w0, #0xf
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
- %urem = urem i32 %X, 4294967295
+ %urem = urem i32 %X, 16
%cmp = icmp eq i32 %urem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
}
-; We can lower remainder of division by powers of two much better elsewhere.
-define i32 @test_urem_pow2(i32 %X) nounwind {
-; CHECK-LABEL: test_urem_pow2:
+; The fold is only valid for positive divisors, and we can't negate INT_MIN.
+define i32 @test_urem_int_min(i32 %X) nounwind {
+; CHECK-LABEL: test_urem_int_min:
; CHECK: // %bb.0:
-; CHECK-NEXT: tst w0, #0xf
+; CHECK-NEXT: tst w0, #0x7fffffff
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
- %urem = urem i32 %X, 16
+ %urem = urem i32 %X, 2147483648
+ %cmp = icmp eq i32 %urem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+; We can lower remainder of division by all-ones much better elsewhere.
+define i32 @test_urem_allones(i32 %X) nounwind {
+; CHECK-LABEL: test_urem_allones:
+; CHECK: // %bb.0:
+; CHECK-NEXT: neg w8, w0
+; CHECK-NEXT: cmp w8, #2 // =2
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %urem = urem i32 %X, 4294967295
%cmp = icmp eq i32 %urem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
Modified: llvm/trunk/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll Tue Jul 30 01:00:49 2019
@@ -1899,6 +1899,437 @@ define <4 x i32> @test_srem_odd_even_one
ret <4 x i32> %ret
}
+;------------------------------------------------------------------------------;
+
+; One INT_MIN divisor in odd divisor
+define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_srem_odd_INT_MIN:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2
+; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1717986919,1717986919,2147483647,1717986919]
+; CHECK-SSE2-NEXT: pand %xmm3, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = <0,u,4294967295,u>
+; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: psrad $1, %xmm3
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
+; CHECK-SSE2-NEXT: psrad $30, %xmm4
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0],xmm3[3,0]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm4[0,2]
+; CHECK-SSE2-NEXT: psrld $31, %xmm2
+; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_srem_odd_INT_MIN:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <1717986919,u,2147483647,u>
+; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,0,4294967295,0]
+; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
+; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE41-NEXT: psrad $30, %xmm2
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3
+; CHECK-SSE41-NEXT: psrad $1, %xmm3
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: psrld $31, %xmm1
+; CHECK-SSE41-NEXT: paddd %xmm3, %xmm1
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_srem_odd_INT_MIN:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_srem_odd_INT_MIN:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1717986919,1717986919,1717986919,1717986919]
+; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_srem_odd_INT_MIN:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1717986919,1717986919,1717986919,1717986919]
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One INT_MIN divisor in even divisor
+define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_srem_even_INT_MIN:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2
+; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483647,2454267027]
+; CHECK-SSE2-NEXT: pand %xmm3, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295,0,4294967295]
+; CHECK-SSE2-NEXT: pand %xmm0, %xmm4
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
+; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,2454267027,2454267027]
+; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
+; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <1,u,4294967295,u>
+; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm4
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
+; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm2
+; CHECK-SSE2-NEXT: psrad $3, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm3
+; CHECK-SSE2-NEXT: psrad $30, %xmm3
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[3,0]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0,2]
+; CHECK-SSE2-NEXT: psrld $31, %xmm4
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_srem_even_INT_MIN:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483647,u>
+; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
+; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
+; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE41-NEXT: psrad $30, %xmm2
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3
+; CHECK-SSE41-NEXT: psrad $3, %xmm3
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: psrld $31, %xmm1
+; CHECK-SSE41-NEXT: paddd %xmm3, %xmm1
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_srem_even_INT_MIN:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_srem_even_INT_MIN:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One INT_MIN divisor in odd+even divisor
+define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_srem_odd_even_INT_MIN:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,2147483647,1374389535]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
+; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
+; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
+; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
+; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
+; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
+; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: psrad $5, %xmm3
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
+; CHECK-SSE2-NEXT: psrad $30, %xmm4
+; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: psrad $3, %xmm3
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
+; CHECK-SSE2-NEXT: psrad $1, %xmm5
+; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
+; CHECK-SSE2-NEXT: psrld $31, %xmm2
+; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,2147483648,100]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_srem_odd_even_INT_MIN:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
+; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
+; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
+; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
+; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE41-NEXT: psrad $5, %xmm1
+; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE41-NEXT: psrad $3, %xmm3
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE41-NEXT: psrad $30, %xmm1
+; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4
+; CHECK-SSE41-NEXT: psrad $1, %xmm4
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: psrld $31, %xmm2
+; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm3
+; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_srem_odd_even_INT_MIN:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
;==============================================================================;
; One all-ones divisor and power-of-two divisor divisor in odd divisor
Modified: llvm/trunk/test/CodeGen/X86/srem-seteq-vec-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/srem-seteq-vec-splat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/srem-seteq-vec-splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/srem-seteq-vec-splat.ll Tue Jul 30 01:00:49 2019
@@ -229,6 +229,241 @@ define <4 x i32> @test_srem_even_100(<4
ret <4 x i32> %ret
}
+; Negative divisors should be negated, and thus this is still splat vectors.
+
+; Odd divisor
+define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_srem_odd_neg25:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,2920577761,2920577761,1374389535]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,2,3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-SSE2-NEXT: pxor %xmm3, %xmm3
+; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
+; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
+; CHECK-SSE2-NEXT: pand %xmm1, %xmm4
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,4294967295,0]
+; CHECK-SSE2-NEXT: pand %xmm0, %xmm1
+; CHECK-SSE2-NEXT: paddd %xmm4, %xmm1
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $31, %xmm1
+; CHECK-SSE2-NEXT: psrad $3, %xmm2
+; CHECK-SSE2-NEXT: paddd %xmm1, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [25,4294967271,4294967271,25]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_srem_odd_neg25:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,2920577761,2920577761,1374389535]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
+; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE41-NEXT: psrld $31, %xmm2
+; CHECK-SSE41-NEXT: psrad $3, %xmm1
+; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_srem_odd_neg25:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1374389535,2920577761,2920577761,1374389535]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_srem_odd_neg25:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1374389535,2920577761,2920577761,1374389535]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX2-NEXT: vpsrad $3, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_srem_odd_neg25:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [1374389535,2920577761,2920577761,1374389535]
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX512VL-NEXT: vpsrad $3, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Even divisors
+define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_srem_even_neg100:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2
+; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2920577761,1374389535,2920577761,1374389535]
+; CHECK-SSE2-NEXT: pand %xmm3, %xmm2
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,0,4294967295,0]
+; CHECK-SSE2-NEXT: pand %xmm0, %xmm4
+; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
+; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: psrld $31, %xmm3
+; CHECK-SSE2-NEXT: psrad $5, %xmm2
+; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
+; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_srem_even_neg100:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2920577761,u,2920577761,u>
+; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE41-NEXT: psrld $31, %xmm1
+; CHECK-SSE41-NEXT: psrad $5, %xmm2
+; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_srem_even_neg100:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_srem_even_neg100:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
+; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2920577761,2920577761,2920577761,2920577761]
+; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX2-NEXT: vpsrad $5, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_srem_even_neg100:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2920577761,2920577761,2920577761,2920577761]
+; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
+; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
+; CHECK-AVX512VL-NEXT: vpsrad $5, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
;------------------------------------------------------------------------------;
; Comparison constant has undef elements.
;------------------------------------------------------------------------------;
@@ -459,6 +694,47 @@ define <4 x i32> @test_srem_even_undef1(
; Negative tests
;------------------------------------------------------------------------------;
+define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_srem_one_eq:
+; CHECK-SSE: # %bb.0:
+; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-SSE-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_srem_one_eq:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_srem_one_eq:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_srem_one_eq:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX512VL-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_srem_one_ne:
+; CHECK-SSE: # %bb.0:
+; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
+; CHECK-SSE-NEXT: retq
+;
+; CHECK-AVX-LABEL: test_srem_one_ne:
+; CHECK-AVX: # %bb.0:
+; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-AVX-NEXT: retq
+ %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
; We can lower remainder of division by powers of two much better elsewhere.
define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
; CHECK-SSE-LABEL: test_srem_pow2:
@@ -516,71 +792,86 @@ define <4 x i32> @test_srem_pow2(<4 x i3
ret <4 x i32> %ret
}
-; We could lower remainder of division by all-ones much better elsewhere.
-define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
-; CHECK-SSE-LABEL: test_srem_allones:
+; We could lower remainder of division by INT_MIN much better elsewhere.
+define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_srem_int_min:
; CHECK-SSE: # %bb.0:
-; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
+; CHECK-SSE-NEXT: psrad $31, %xmm1
+; CHECK-SSE-NEXT: psrld $1, %xmm1
+; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
+; CHECK-SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; CHECK-SSE-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE-NEXT: psrld $31, %xmm0
; CHECK-SSE-NEXT: retq
;
-; CHECK-AVX1-LABEL: test_srem_allones:
+; CHECK-AVX1-LABEL: test_srem_int_min:
; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-AVX1-NEXT: retq
;
-; CHECK-AVX2-LABEL: test_srem_allones:
+; CHECK-AVX2-LABEL: test_srem_int_min:
; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-AVX2-NEXT: retq
;
-; CHECK-AVX512VL-LABEL: test_srem_allones:
+; CHECK-AVX512VL-LABEL: test_srem_int_min:
; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX512VL-NEXT: vpsrad $31, %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
; CHECK-AVX512VL-NEXT: retq
- %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
+ %srem = srem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-; If all divisors are ones, this is constant-folded.
-define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
-; CHECK-SSE-LABEL: test_srem_one_eq:
+; We could lower remainder of division by all-ones much better elsewhere.
+define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_srem_allones:
; CHECK-SSE: # %bb.0:
; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
; CHECK-SSE-NEXT: retq
;
-; CHECK-AVX1-LABEL: test_srem_one_eq:
+; CHECK-AVX1-LABEL: test_srem_allones:
; CHECK-AVX1: # %bb.0:
; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
; CHECK-AVX1-NEXT: retq
;
-; CHECK-AVX2-LABEL: test_srem_one_eq:
+; CHECK-AVX2-LABEL: test_srem_allones:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
; CHECK-AVX2-NEXT: retq
;
-; CHECK-AVX512VL-LABEL: test_srem_one_eq:
+; CHECK-AVX512VL-LABEL: test_srem_allones:
; CHECK-AVX512VL: # %bb.0:
; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
; CHECK-AVX512VL-NEXT: retq
- %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
-; CHECK-SSE-LABEL: test_srem_one_ne:
-; CHECK-SSE: # %bb.0:
-; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
-; CHECK-SSE-NEXT: retq
-;
-; CHECK-AVX-LABEL: test_srem_one_ne:
-; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-AVX-NEXT: retq
- %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
- %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
- %ret = zext <4 x i1> %cmp to <4 x i32>
- ret <4 x i32> %ret
-}
Modified: llvm/trunk/test/CodeGen/X86/srem-seteq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/srem-seteq.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/srem-seteq.ll (original)
+++ llvm/trunk/test/CodeGen/X86/srem-seteq.ll Tue Jul 30 01:00:49 2019
@@ -359,11 +359,86 @@ define i32 @test_srem_odd_setne(i32 %X)
ret i32 %ret
}
+; The fold is only valid for positive divisors, negative-ones should be negated.
+define i32 @test_srem_negative_odd(i32 %X) nounwind {
+; X86-LABEL: test_srem_negative_odd:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $-1717986919, %edx # imm = 0x99999999
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: imull %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: sarl %edx
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: leal (%edx,%edx,4), %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_srem_negative_odd:
+; X64: # %bb.0:
+; X64-NEXT: movslq %edi, %rcx
+; X64-NEXT: imulq $-1717986919, %rcx, %rax # imm = 0x99999999
+; X64-NEXT: movq %rax, %rdx
+; X64-NEXT: shrq $63, %rdx
+; X64-NEXT: sarq $33, %rax
+; X64-NEXT: addl %edx, %eax
+; X64-NEXT: leal (%rax,%rax,4), %edx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: addl %edx, %ecx
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+ %srem = srem i32 %X, -5
+ %cmp = icmp ne i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+define i32 @test_srem_negative_even(i32 %X) nounwind {
+; X86-LABEL: test_srem_negative_even:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl $1840700269, %edx # imm = 0x6DB6DB6D
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: imull %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: movl %edx, %eax
+; X86-NEXT: shrl $31, %eax
+; X86-NEXT: sarl $3, %edx
+; X86-NEXT: addl %eax, %edx
+; X86-NEXT: imull $-14, %edx, %edx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl %edx, %ecx
+; X86-NEXT: setne %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_srem_negative_even:
+; X64: # %bb.0:
+; X64-NEXT: movslq %edi, %rcx
+; X64-NEXT: imulq $1840700269, %rcx, %rax # imm = 0x6DB6DB6D
+; X64-NEXT: shrq $32, %rax
+; X64-NEXT: subl %ecx, %eax
+; X64-NEXT: movl %eax, %edx
+; X64-NEXT: shrl $31, %edx
+; X64-NEXT: sarl $3, %eax
+; X64-NEXT: addl %edx, %eax
+; X64-NEXT: imull $-14, %eax, %edx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %edx, %ecx
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+ %srem = srem i32 %X, -14
+ %cmp = icmp ne i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
;------------------------------------------------------------------------------;
; Negative tests
;------------------------------------------------------------------------------;
-; The fold is invalid if divisor is 1.
+; We can lower remainder of division by one much better elsewhere.
define i32 @test_srem_one(i32 %X) nounwind {
; CHECK-LABEL: test_srem_one:
; CHECK: # %bb.0:
@@ -375,18 +450,6 @@ define i32 @test_srem_one(i32 %X) nounwi
ret i32 %ret
}
-; We can lower remainder of division by all-ones much better elsewhere.
-define i32 @test_srem_allones(i32 %X) nounwind {
-; CHECK-LABEL: test_srem_allones:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl $1, %eax
-; CHECK-NEXT: ret{{[l|q]}}
- %srem = srem i32 %X, 4294967295
- %cmp = icmp eq i32 %srem, 0
- %ret = zext i1 %cmp to i32
- ret i32 %ret
-}
-
; We can lower remainder of division by powers of two much better elsewhere.
define i32 @test_srem_pow2(i32 %X) nounwind {
; X86-LABEL: test_srem_pow2:
@@ -417,4 +480,48 @@ define i32 @test_srem_pow2(i32 %X) nounw
%cmp = icmp eq i32 %srem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
+}
+
+; The fold is only valid for positive divisors, and we can't negate INT_MIN.
+define i32 @test_srem_int_min(i32 %X) nounwind {
+; X86-LABEL: test_srem_int_min:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: sarl $31, %edx
+; X86-NEXT: shrl %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: andl $-2147483648, %edx # imm = 0x80000000
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: sete %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_srem_int_min:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: sarl $31, %ecx
+; X64-NEXT: shrl %ecx
+; X64-NEXT: addl %edi, %ecx
+; X64-NEXT: andl $-2147483648, %ecx # imm = 0x80000000
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: addl %edi, %ecx
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+ %srem = srem i32 %X, 2147483648
+ %cmp = icmp eq i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+; We can lower remainder of division by all-ones much better elsewhere.
+define i32 @test_srem_allones(i32 %X) nounwind {
+; CHECK-LABEL: test_srem_allones:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: ret{{[l|q]}}
+ %srem = srem i32 %X, 4294967295
+ %cmp = icmp eq i32 %srem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
}
Modified: llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/urem-seteq-vec-nonsplat.ll Tue Jul 30 01:00:49 2019
@@ -5,8 +5,6 @@
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL
-; At the moment, BuildUREMEqFold does not handle nonsplat vectors.
-
; Odd+Even divisors
define <4 x i32> @test_urem_odd_even(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_odd_even:
@@ -1213,6 +1211,310 @@ define <4 x i32> @test_urem_odd_even_one
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
+}
+
+;------------------------------------------------------------------------------;
+
+; One INT_MIN divisor in odd divisor
+define <4 x i32> @test_urem_odd_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_urem_odd_INT_MIN:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = <3435973837,u,2,u>
+; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[3,0]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,2]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_urem_odd_INT_MIN:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <3435973837,u,2,u>
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: psrld $2, %xmm1
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_urem_odd_INT_MIN:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_urem_odd_INT_MIN:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm0, %xmm2
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_urem_odd_INT_MIN:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One INT_MIN divisor in even divisor
+define <4 x i32> @test_urem_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_urem_even_INT_MIN:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
+; CHECK-SSE2-NEXT: psrld $1, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm1[3,0]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm2[3,0]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm1[0,2]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_urem_even_INT_MIN:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
+; CHECK-SSE41-NEXT: psrld $1, %xmm1
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-SSE41-NEXT: psrld $2, %xmm2
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_urem_even_INT_MIN:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm0[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_urem_even_INT_MIN:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
+; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
+; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_urem_even_INT_MIN:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; One INT_MIN divisor in odd+even divisor
+define <4 x i32> @test_urem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_urem_odd_even_INT_MIN:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,2454267027,2,1374389535]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
+; CHECK-SSE2-NEXT: psrld $1, %xmm3
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[3,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $5, %xmm1
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
+; CHECK-SSE2-NEXT: psrld $2, %xmm3
+; CHECK-SSE2-NEXT: movsd {{.*#+}} xmm2 = xmm3[0],xmm2[1]
+; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [5,14,2147483648,100]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm1[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm4
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_urem_odd_even_INT_MIN:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
+; CHECK-SSE41-NEXT: psrld $1, %xmm1
+; CHECK-SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,2454267027,2,1374389535]
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuludq %xmm1, %xmm3
+; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
+; CHECK-SSE41-NEXT: psrld $5, %xmm2
+; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3
+; CHECK-SSE41-NEXT: psrld $2, %xmm3
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm3
+; CHECK-SSE41-NEXT: psubd %xmm3, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_urem_odd_even_INT_MIN:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
+; CHECK-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
+; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,2454267027,2,1374389535]
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm3
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_urem_odd_even_INT_MIN:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,2454267027,2,1374389535]
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
+; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_urem_odd_even_INT_MIN:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
}
;==============================================================================;
Modified: llvm/trunk/test/CodeGen/X86/urem-seteq-vec-splat.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/urem-seteq-vec-splat.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/urem-seteq-vec-splat.ll (original)
+++ llvm/trunk/test/CodeGen/X86/urem-seteq-vec-splat.ll Tue Jul 30 01:00:49 2019
@@ -150,6 +150,144 @@ define <4 x i32> @test_urem_even_100(<4
ret <4 x i32> %ret
}
+; Negative divisors should be negated, and thus this is still splat vectors.
+
+; Odd divisor
+define <4 x i32> @test_urem_odd_neg25(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_urem_odd_neg25:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,1030792151,1030792151,3264175145]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
+; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_urem_odd_neg25:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
+; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798691,1,1,171798691]
+; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX-LABEL: test_urem_odd_neg25:
+; CHECK-AVX: # %bb.0:
+; CHECK-AVX-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; Even divisors
+define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
+; CHECK-SSE2-LABEL: test_urem_even_neg100:
+; CHECK-SSE2: # %bb.0:
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
+; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE2-NEXT: psrld $2, %xmm2
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE2-NEXT: psrld $27, %xmm1
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; CHECK-SSE2-NEXT: psrld $5, %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
+; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE2-NEXT: psrld $31, %xmm0
+; CHECK-SSE2-NEXT: retq
+;
+; CHECK-SSE41-LABEL: test_urem_even_neg100:
+; CHECK-SSE41: # %bb.0:
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm1
+; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2
+; CHECK-SSE41-NEXT: psrld $2, %xmm2
+; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
+; CHECK-SSE41-NEXT: psrld $5, %xmm1
+; CHECK-SSE41-NEXT: psrld $27, %xmm2
+; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
+; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
+; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE41-NEXT: psrld $31, %xmm0
+; CHECK-SSE41-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_urem_even_neg100:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm2
+; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2
+; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
+; CHECK-AVX1-NEXT: vpsrld $5, %xmm1, %xmm2
+; CHECK-AVX1-NEXT: vpsrld $27, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
+; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_urem_even_neg100:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [1374389535,1374389535,1374389535,1374389535]
+; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [536870925,536870925,536870925,536870925]
+; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
+; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_urem_even_neg100:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
;------------------------------------------------------------------------------;
; Comparison constant has undef elements.
;------------------------------------------------------------------------------;
@@ -348,6 +486,47 @@ define <4 x i32> @test_urem_even_undef1(
; Negative tests
;------------------------------------------------------------------------------;
+define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_urem_one_eq:
+; CHECK-SSE: # %bb.0:
+; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-SSE-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_urem_one_eq:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_urem_one_eq:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_urem_one_eq:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_urem_one_ne:
+; CHECK-SSE: # %bb.0:
+; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
+; CHECK-SSE-NEXT: retq
+;
+; CHECK-AVX-LABEL: test_urem_one_ne:
+; CHECK-AVX: # %bb.0:
+; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-AVX-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
; We can lower remainder of division by powers of two much better elsewhere.
define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind {
; CHECK-SSE-LABEL: test_urem_pow2:
@@ -388,6 +567,46 @@ define <4 x i32> @test_urem_pow2(<4 x i3
ret <4 x i32> %ret
}
+; We could lower remainder of division by INT_MIN much better elsewhere.
+define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind {
+; CHECK-SSE-LABEL: test_urem_int_min:
+; CHECK-SSE: # %bb.0:
+; CHECK-SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
+; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; CHECK-SSE-NEXT: psrld $31, %xmm0
+; CHECK-SSE-NEXT: retq
+;
+; CHECK-AVX1-LABEL: test_urem_int_min:
+; CHECK-AVX1: # %bb.0:
+; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX1-NEXT: retq
+;
+; CHECK-AVX2-LABEL: test_urem_int_min:
+; CHECK-AVX2: # %bb.0:
+; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483647,2147483647,2147483647,2147483647]
+; CHECK-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX2-NEXT: retq
+;
+; CHECK-AVX512VL-LABEL: test_urem_int_min:
+; CHECK-AVX512VL: # %bb.0:
+; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
+; CHECK-AVX512VL-NEXT: retq
+ %urem = urem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
+ %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
+ %ret = zext <4 x i1> %cmp to <4 x i32>
+ ret <4 x i32> %ret
+}
+
; We could lower remainder of division by all-ones much better elsewhere.
define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind {
; CHECK-SSE2-LABEL: test_urem_allones:
@@ -442,45 +661,3 @@ define <4 x i32> @test_urem_allones(<4 x
%ret = zext <4 x i1> %cmp to <4 x i32>
ret <4 x i32> %ret
}
-
-; If all divisors are ones, this is constant-folded.
-define <4 x i32> @test_urem_one_eq(<4 x i32> %X) nounwind {
-; CHECK-SSE-LABEL: test_urem_one_eq:
-; CHECK-SSE: # %bb.0:
-; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
-; CHECK-SSE-NEXT: retq
-;
-; CHECK-AVX1-LABEL: test_urem_one_eq:
-; CHECK-AVX1: # %bb.0:
-; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
-; CHECK-AVX1-NEXT: retq
-;
-; CHECK-AVX2-LABEL: test_urem_one_eq:
-; CHECK-AVX2: # %bb.0:
-; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
-; CHECK-AVX2-NEXT: retq
-;
-; CHECK-AVX512VL-LABEL: test_urem_one_eq:
-; CHECK-AVX512VL: # %bb.0:
-; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
-; CHECK-AVX512VL-NEXT: retq
- %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
- %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
- %ret = zext <4 x i1> %cmp to <4 x i32>
- ret <4 x i32> %ret
-}
-define <4 x i32> @test_urem_one_ne(<4 x i32> %X) nounwind {
-; CHECK-SSE-LABEL: test_urem_one_ne:
-; CHECK-SSE: # %bb.0:
-; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
-; CHECK-SSE-NEXT: retq
-;
-; CHECK-AVX-LABEL: test_urem_one_ne:
-; CHECK-AVX: # %bb.0:
-; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; CHECK-AVX-NEXT: retq
- %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
- %cmp = icmp ne <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
- %ret = zext <4 x i1> %cmp to <4 x i32>
- ret <4 x i32> %ret
-}
Modified: llvm/trunk/test/CodeGen/X86/urem-seteq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/urem-seteq.ll?rev=367294&r1=367293&r2=367294&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/urem-seteq.ll (original)
+++ llvm/trunk/test/CodeGen/X86/urem-seteq.ll Tue Jul 30 01:00:49 2019
@@ -6,10 +6,6 @@
; Odd divisors
;------------------------------------------------------------------------------;
-; This tests the BuildREMEqFold optimization with UREM, i32, odd divisor, SETEQ.
-; The corresponding pseudocode is:
-; Q <- [N * multInv(5, 2^32)] <=> [N * 0xCCCCCCCD] <=> [N * (-858993459)]
-; res <- [Q <= (2^32 - 1) / 5] <=> [Q <= 858993459] <=> [Q < 858993460]
define i32 @test_urem_odd(i32 %X) nounwind {
; X86-LABEL: test_urem_odd:
; X86: # %bb.0:
@@ -104,12 +100,6 @@ define i32 @test_urem_odd_bit31(i32 %X)
; Even divisors
;------------------------------------------------------------------------------;
-; This tests the BuildREMEqFold optimization with UREM, i16, even divisor, SETNE.
-; In this case, D <=> 14 <=> 7 * 2^1, so D0 = 7 and K = 1.
-; The corresponding pseudocode is:
-; Q <- [N * multInv(D0, 2^16)] <=> [N * multInv(7, 2^16)] <=> [N * 28087]
-; Q <- [Q >>rot K] <=> [Q >>rot 1]
-; res <- ![Q <= (2^16 - 1) / 7] <=> ![Q <= 9362] <=> [Q > 9362]
define i16 @test_urem_even(i16 %X) nounwind {
; X86-LABEL: test_urem_even:
; X86: # %bb.0:
@@ -239,11 +229,57 @@ define i32 @test_urem_odd_setne(i32 %X)
ret i32 %ret
}
+; The fold is only valid for positive divisors, negative-ones should be negated.
+define i32 @test_urem_negative_odd(i32 %X) nounwind {
+; X86-LABEL: test_urem_negative_odd:
+; X86: # %bb.0:
+; X86-NEXT: imull $858993459, {{[0-9]+}}(%esp), %ecx # imm = 0x33333333
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: seta %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_urem_negative_odd:
+; X64: # %bb.0:
+; X64-NEXT: imull $858993459, %edi, %ecx # imm = 0x33333333
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $1, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, -5
+ %cmp = icmp ne i32 %urem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+define i32 @test_urem_negative_even(i32 %X) nounwind {
+; X86-LABEL: test_urem_negative_even:
+; X86: # %bb.0:
+; X86-NEXT: imull $-920350135, {{[0-9]+}}(%esp), %ecx # imm = 0xC9249249
+; X86-NEXT: rorl %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl $1, %ecx
+; X86-NEXT: seta %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_urem_negative_even:
+; X64: # %bb.0:
+; X64-NEXT: imull $-920350135, %edi, %ecx # imm = 0xC9249249
+; X64-NEXT: rorl %ecx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $1, %ecx
+; X64-NEXT: seta %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, -14
+ %cmp = icmp ne i32 %urem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
;------------------------------------------------------------------------------;
; Negative tests
;------------------------------------------------------------------------------;
-; The fold is invalid if divisor is 1.
+; We can lower remainder of division by one much better elsewhere.
define i32 @test_urem_one(i32 %X) nounwind {
; CHECK-LABEL: test_urem_one:
; CHECK: # %bb.0:
@@ -255,46 +291,67 @@ define i32 @test_urem_one(i32 %X) nounwi
ret i32 %ret
}
-; We can lower remainder of division by all-ones much better elsewhere.
-define i32 @test_urem_allones(i32 %X) nounwind {
-; X86-LABEL: test_urem_allones:
+; We can lower remainder of division by powers of two much better elsewhere.
+define i32 @test_urem_pow2(i32 %X) nounwind {
+; X86-LABEL: test_urem_pow2:
; X86: # %bb.0:
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: cmpl $2, %ecx
-; X86-NEXT: setb %al
+; X86-NEXT: testb $15, {{[0-9]+}}(%esp)
+; X86-NEXT: sete %al
; X86-NEXT: retl
;
-; X64-LABEL: test_urem_allones:
+; X64-LABEL: test_urem_pow2:
; X64: # %bb.0:
-; X64-NEXT: negl %edi
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: cmpl $2, %edi
-; X64-NEXT: setb %al
+; X64-NEXT: testb $15, %dil
+; X64-NEXT: sete %al
; X64-NEXT: retq
- %urem = urem i32 %X, 4294967295
+ %urem = urem i32 %X, 16
%cmp = icmp eq i32 %urem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
}
-; We can lower remainder of division by powers of two much better elsewhere.
-define i32 @test_urem_pow2(i32 %X) nounwind {
-; X86-LABEL: test_urem_pow2:
+; The fold is only valid for positive divisors, and we can't negate INT_MIN.
+define i32 @test_urem_int_min(i32 %X) nounwind {
+; X86-LABEL: test_urem_int_min:
; X86: # %bb.0:
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: testb $15, {{[0-9]+}}(%esp)
+; X86-NEXT: testl $2147483647, {{[0-9]+}}(%esp) # imm = 0x7FFFFFFF
; X86-NEXT: sete %al
; X86-NEXT: retl
;
-; X64-LABEL: test_urem_pow2:
+; X64-LABEL: test_urem_int_min:
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
-; X64-NEXT: testb $15, %dil
+; X64-NEXT: testl $2147483647, %edi # imm = 0x7FFFFFFF
; X64-NEXT: sete %al
; X64-NEXT: retq
- %urem = urem i32 %X, 16
+ %urem = urem i32 %X, 2147483648
+ %cmp = icmp eq i32 %urem, 0
+ %ret = zext i1 %cmp to i32
+ ret i32 %ret
+}
+
+; We can lower remainder of division by all-ones much better elsewhere.
+define i32 @test_urem_allones(i32 %X) nounwind {
+; X86-LABEL: test_urem_allones:
+; X86: # %bb.0:
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: subl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: cmpl $2, %ecx
+; X86-NEXT: setb %al
+; X86-NEXT: retl
+;
+; X64-LABEL: test_urem_allones:
+; X64: # %bb.0:
+; X64-NEXT: negl %edi
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $2, %edi
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+ %urem = urem i32 %X, 4294967295
%cmp = icmp eq i32 %urem, 0
%ret = zext i1 %cmp to i32
ret i32 %ret
More information about the llvm-commits
mailing list