[llvm] c782e34 - [AArch64] Add VSHL knownBits handling.
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 8 13:59:59 PDT 2023
Author: David Green
Date: 2023-08-08T21:59:53+01:00
New Revision: c782e3497d0da8d7ab467e37f3c127a0bb176e0c
URL: https://github.com/llvm/llvm-project/commit/c782e3497d0da8d7ab467e37f3c127a0bb176e0c
DIFF: https://github.com/llvm/llvm-project/commit/c782e3497d0da8d7ab467e37f3c127a0bb176e0c.diff
LOG: [AArch64] Add VSHL knownBits handling.
These can be handled in the same way as other shifts.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
llvm/test/CodeGen/AArch64/rax1.ll
llvm/test/CodeGen/AArch64/rotate.ll
llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0407252d50d12e..5ed6b53c62adbe 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2107,6 +2107,13 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
Known = KnownBits::ashr(Known, Known2);
break;
}
+ case AArch64ISD::VSHL: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+ Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+ Known = KnownBits::shl(Known, Known2);
+ break;
+ }
case AArch64ISD::MOVI: {
ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(0));
Known =
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
index c4481871dec49b..3e097c559032d1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
@@ -94,9 +94,9 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
; CHECK-LABEL: rotl_v4i32_rotl_const_shift:
; CHECK: // %bb.0:
-; CHECK-NEXT: ushr v1.4s, v0.4s, #29
-; CHECK-NEXT: shl v0.4s, v0.4s, #3
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: shl v1.4s, v0.4s, #3
+; CHECK-NEXT: usra v1.4s, v0.4s, #29
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret <4 x i32> %f
@@ -185,8 +185,8 @@ define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
; CHECK-LABEL: rotr_v4i32_const_shift:
; CHECK: // %bb.0:
; CHECK-NEXT: shl v1.4s, v0.4s, #29
-; CHECK-NEXT: ushr v0.4s, v0.4s, #3
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: usra v1.4s, v0.4s, #3
+; CHECK-NEXT: mov v0.16b, v1.16b
; CHECK-NEXT: ret
%f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
ret <4 x i32> %f
diff --git a/llvm/test/CodeGen/AArch64/rax1.ll b/llvm/test/CodeGen/AArch64/rax1.ll
index bc1a216df70f01..f679e9099d09e2 100644
--- a/llvm/test/CodeGen/AArch64/rax1.ll
+++ b/llvm/test/CodeGen/AArch64/rax1.ll
@@ -10,10 +10,9 @@ define <2 x i64> @rax1(<2 x i64> %x, <2 x i64> %y) {
;
; NOSHA3-LABEL: rax1:
; NOSHA3: // %bb.0:
-; NOSHA3-NEXT: ushr v2.2d, v1.2d, #63
-; NOSHA3-NEXT: add v1.2d, v1.2d, v1.2d
-; NOSHA3-NEXT: orr v1.16b, v1.16b, v2.16b
-; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b
+; NOSHA3-NEXT: add v2.2d, v1.2d, v1.2d
+; NOSHA3-NEXT: usra v2.2d, v1.2d, #63
+; NOSHA3-NEXT: eor v0.16b, v0.16b, v2.16b
; NOSHA3-NEXT: ret
%a = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %y, <2 x i64> %y, <2 x i64> <i64 1, i64 1>)
%b = xor <2 x i64> %x, %a
diff --git a/llvm/test/CodeGen/AArch64/rotate.ll b/llvm/test/CodeGen/AArch64/rotate.ll
index 9d1e1562504c64..8d52b6dd3a795d 100644
--- a/llvm/test/CodeGen/AArch64/rotate.ll
+++ b/llvm/test/CodeGen/AArch64/rotate.ll
@@ -6,10 +6,9 @@
define <2 x i64> @testcase(ptr %in) {
; CHECK-LABEL: testcase:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldr q0, [x0]
-; CHECK-NEXT: ushr v1.2d, v0.2d, #8
-; CHECK-NEXT: shl v0.2d, v0.2d, #56
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT: ldr q1, [x0]
+; CHECK-NEXT: shl v0.2d, v1.2d, #56
+; CHECK-NEXT: usra v0.2d, v1.2d, #8
; CHECK-NEXT: ret
%1 = load <2 x i64>, ptr %in
%2 = lshr <2 x i64> %1, <i64 8, i64 8>
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
index e75d220ed21459..58998b02887134 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
@@ -83,17 +83,16 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w9, #9362 // =0x2492
; CHECK-NEXT: movk w8, #46811, lsl #16
; CHECK-NEXT: movk w9, #4681, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: adrp x8, .LCPI3_0
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: shl v0.4s, v2.4s, #31
-; CHECK-NEXT: ushr v1.4s, v2.4s, #1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v0.4s, v2.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
+; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -107,17 +106,16 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w9, #9362 // =0x2492
; CHECK-NEXT: movk w8, #46811, lsl #16
; CHECK-NEXT: movk w9, #4681, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: shl v0.4s, v2.4s, #31
-; CHECK-NEXT: ushr v1.4s, v2.4s, #1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v0.4s, v2.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
+; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
%cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -298,17 +296,16 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w9, #9362 // =0x2492
; CHECK-NEXT: movk w8, #46811, lsl #16
; CHECK-NEXT: movk w9, #4681, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0]
; CHECK-NEXT: shl v0.4s, v2.4s, #31
-; CHECK-NEXT: ushr v1.4s, v2.4s, #1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v0.4s, v2.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
+; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -552,17 +549,16 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w9, #9362 // =0x2492
; CHECK-NEXT: movk w8, #46811, lsl #16
; CHECK-NEXT: movk w9, #4681, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: adrp x8, .LCPI20_0
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0]
; CHECK-NEXT: shl v0.4s, v2.4s, #31
-; CHECK-NEXT: ushr v1.4s, v2.4s, #1
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0]
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v0.4s, v2.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
+; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
index ce71f09062cd3f..0b06032add8421 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
@@ -33,18 +33,17 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w9, #47184 // =0xb850
; CHECK-NEXT: movk w8, #49807, lsl #16
; CHECK-NEXT: movk w9, #1310, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: mov w8, #23592 // =0x5c28
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: movk w8, #655, lsl #16
+; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: shl v0.4s, v2.4s, #30
-; CHECK-NEXT: ushr v1.4s, v2.4s, #2
-; CHECK-NEXT: dup v2.4s, w8
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v0.4s, v2.4s, #2
+; CHECK-NEXT: movi v2.4s, #1
+; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -86,18 +85,17 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w9, #47184 // =0xb850
; CHECK-NEXT: movk w8, #49807, lsl #16
; CHECK-NEXT: movk w9, #1310, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: dup v2.4s, w9
; CHECK-NEXT: mov w8, #23592 // =0x5c28
; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s
; CHECK-NEXT: movk w8, #655, lsl #16
+; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: shl v0.4s, v2.4s, #30
-; CHECK-NEXT: ushr v1.4s, v2.4s, #2
-; CHECK-NEXT: dup v2.4s, w8
-; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v0.4s, v2.4s, #2
+; CHECK-NEXT: movi v2.4s, #1
+; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
%cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
index 45239bcec9dbb7..5dcb9edfd1f34c 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
@@ -273,16 +273,15 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #28087 // =0x6db7
; CHECK-NEXT: movk w8, #46811, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: adrp x8, .LCPI11_0
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0]
; CHECK-NEXT: shl v1.4s, v0.4s, #31
-; CHECK-NEXT: ushr v0.4s, v0.4s, #1
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v1.4s, v0.4s, #1
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
index c2233754b46084..478a34cf2a2b97 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
@@ -54,11 +54,10 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
; CHECK-NEXT: mov w8, #43690 // =0xaaaa
; CHECK-NEXT: movk w8, #10922, lsl #16
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: shl v1.4s, v0.4s, #31
-; CHECK-NEXT: ushr v0.4s, v0.4s, #1
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: usra v1.4s, v0.4s, #1
+; CHECK-NEXT: dup v0.4s, w8
+; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
@@ -70,18 +69,17 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
; CHECK-LABEL: t32_6_part1:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: mov w9, #43691 // =0xaaab
-; CHECK-NEXT: movk w9, #43690, lsl #16
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT: adrp x8, .LCPI3_1
-; CHECK-NEXT: dup v2.4s, w9
+; CHECK-NEXT: mov w8, #43691 // =0xaaab
+; CHECK-NEXT: movk w8, #43690, lsl #16
; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: adrp x8, .LCPI3_1
+; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
; CHECK-NEXT: shl v1.4s, v0.4s, #31
-; CHECK-NEXT: ushr v0.4s, v0.4s, #1
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: usra v1.4s, v0.4s, #1
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s
; CHECK-NEXT: xtn v0.4h, v0.4s
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
index a173a606e426bc..d66e81939a8b8a 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
@@ -28,17 +28,16 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #23593 // =0x5c29
; CHECK-NEXT: movk w8, #49807, lsl #16
-; CHECK-NEXT: movi v3.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: dup v1.4s, w8
; CHECK-NEXT: mov w8, #23592 // =0x5c28
; CHECK-NEXT: movk w8, #655, lsl #16
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: dup v2.4s, w8
; CHECK-NEXT: shl v1.4s, v0.4s, #30
-; CHECK-NEXT: ushr v0.4s, v0.4s, #2
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v1.4s, v0.4s, #2
+; CHECK-NEXT: dup v0.4s, w8
+; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
@@ -72,16 +71,15 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
; CHECK-LABEL: test_urem_even_neg100:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI3_0
-; CHECK-NEXT: movi v3.4s, #1
+; CHECK-NEXT: movi v2.4s, #1
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0]
; CHECK-NEXT: adrp x8, .LCPI3_1
; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
; CHECK-NEXT: shl v1.4s, v0.4s, #30
-; CHECK-NEXT: ushr v0.4s, v0.4s, #2
-; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT: and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT: usra v1.4s, v0.4s, #2
+; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT: cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: and v0.16b, v0.16b, v2.16b
; CHECK-NEXT: ret
%urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
%cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
More information about the llvm-commits
mailing list