[llvm] c782e34 - [AArch64] Add VSHL knownBits handling.

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 8 13:59:59 PDT 2023


Author: David Green
Date: 2023-08-08T21:59:53+01:00
New Revision: c782e3497d0da8d7ab467e37f3c127a0bb176e0c

URL: https://github.com/llvm/llvm-project/commit/c782e3497d0da8d7ab467e37f3c127a0bb176e0c
DIFF: https://github.com/llvm/llvm-project/commit/c782e3497d0da8d7ab467e37f3c127a0bb176e0c.diff

LOG: [AArch64] Add VSHL knownBits handling.

These can be handled in the same way as other shifts.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
    llvm/test/CodeGen/AArch64/rax1.ll
    llvm/test/CodeGen/AArch64/rotate.ll
    llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
    llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
    llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
    llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
    llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0407252d50d12e..5ed6b53c62adbe 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2107,6 +2107,13 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     Known = KnownBits::ashr(Known, Known2);
     break;
   }
+  case AArch64ISD::VSHL: {
+    KnownBits Known2;
+    Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
+    Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
+    Known = KnownBits::shl(Known, Known2);
+    break;
+  }
   case AArch64ISD::MOVI: {
     ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(0));
     Known =

diff  --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
index c4481871dec49b..3e097c559032d1 100644
--- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
@@ -94,9 +94,9 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
 define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
 ; CHECK-LABEL: rotl_v4i32_rotl_const_shift:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushr v1.4s, v0.4s, #29
-; CHECK-NEXT:    shl v0.4s, v0.4s, #3
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    shl v1.4s, v0.4s, #3
+; CHECK-NEXT:    usra v1.4s, v0.4s, #29
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
   ret <4 x i32> %f
@@ -185,8 +185,8 @@ define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
 ; CHECK-LABEL: rotr_v4i32_const_shift:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #29
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #3
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    usra v1.4s, v0.4s, #3
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
   ret <4 x i32> %f

diff  --git a/llvm/test/CodeGen/AArch64/rax1.ll b/llvm/test/CodeGen/AArch64/rax1.ll
index bc1a216df70f01..f679e9099d09e2 100644
--- a/llvm/test/CodeGen/AArch64/rax1.ll
+++ b/llvm/test/CodeGen/AArch64/rax1.ll
@@ -10,10 +10,9 @@ define <2 x i64> @rax1(<2 x i64> %x, <2 x i64> %y) {
 ;
 ; NOSHA3-LABEL: rax1:
 ; NOSHA3:       // %bb.0:
-; NOSHA3-NEXT:    ushr v2.2d, v1.2d, #63
-; NOSHA3-NEXT:    add v1.2d, v1.2d, v1.2d
-; NOSHA3-NEXT:    orr v1.16b, v1.16b, v2.16b
-; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
+; NOSHA3-NEXT:    add v2.2d, v1.2d, v1.2d
+; NOSHA3-NEXT:    usra v2.2d, v1.2d, #63
+; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
 ; NOSHA3-NEXT:    ret
     %a = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %y, <2 x i64> %y, <2 x i64> <i64 1, i64 1>)
     %b = xor <2 x i64> %x, %a

diff  --git a/llvm/test/CodeGen/AArch64/rotate.ll b/llvm/test/CodeGen/AArch64/rotate.ll
index 9d1e1562504c64..8d52b6dd3a795d 100644
--- a/llvm/test/CodeGen/AArch64/rotate.ll
+++ b/llvm/test/CodeGen/AArch64/rotate.ll
@@ -6,10 +6,9 @@
 define <2 x i64> @testcase(ptr %in) {
 ; CHECK-LABEL: testcase:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    ushr v1.2d, v0.2d, #8
-; CHECK-NEXT:    shl v0.2d, v0.2d, #56
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ldr q1, [x0]
+; CHECK-NEXT:    shl v0.2d, v1.2d, #56
+; CHECK-NEXT:    usra v0.2d, v1.2d, #8
 ; CHECK-NEXT:    ret
   %1 = load <2 x i64>, ptr %in
   %2 = lshr <2 x i64> %1, <i64 8, i64 8>

diff  --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
index e75d220ed21459..58998b02887134 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
@@ -83,17 +83,16 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -107,17 +106,16 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhi v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
   %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -298,17 +296,16 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -552,17 +549,16 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI20_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
index ce71f09062cd3f..0b06032add8421 100644
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
@@ -33,18 +33,17 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w9, #47184 // =0xb850
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movk w9, #1310, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    mov w8, #23592 // =0x5c28
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    movk w8, #655, lsl #16
+; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #30
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #2
-; CHECK-NEXT:    dup v2.4s, w8
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v0.4s, v2.4s, #2
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -86,18 +85,17 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w9, #47184 // =0xb850
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movk w9, #1310, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    mov w8, #23592 // =0x5c28
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    movk w8, #655, lsl #16
+; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #30
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #2
-; CHECK-NEXT:    dup v2.4s, w8
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v0.4s, v2.4s, #2
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
index 45239bcec9dbb7..5dcb9edfd1f34c 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
@@ -273,16 +273,15 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #28087 // =0x6db7
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #31
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #1
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v1.4s, v0.4s, #1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>

diff  --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
index c2233754b46084..478a34cf2a2b97 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
@@ -54,11 +54,10 @@ define <4 x i1> @t32_6_part0(<4 x i32> %X) nounwind {
 ; CHECK-NEXT:    mov w8, #43690 // =0xaaaa
 ; CHECK-NEXT:    movk w8, #10922, lsl #16
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    dup v2.4s, w8
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #31
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #1
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    usra v1.4s, v0.4s, #1
+; CHECK-NEXT:    dup v0.4s, w8
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
@@ -70,18 +69,17 @@ define <4 x i1> @t32_6_part1(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: t32_6_part1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    mov w9, #43691 // =0xaaab
-; CHECK-NEXT:    movk w9, #43690, lsl #16
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT:    adrp x8, .LCPI3_1
-; CHECK-NEXT:    dup v2.4s, w9
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
+; CHECK-NEXT:    movk w8, #43690, lsl #16
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    mul v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    adrp x8, .LCPI3_1
+; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #31
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #1
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    usra v1.4s, v0.4s, #1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>

diff  --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
index a173a606e426bc..d66e81939a8b8a 100644
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
@@ -28,17 +28,16 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    mov w8, #23593 // =0x5c29
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    mov w8, #23592 // =0x5c28
 ; CHECK-NEXT:    movk w8, #655, lsl #16
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    dup v2.4s, w8
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #30
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #2
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v1.4s, v0.4s, #2
+; CHECK-NEXT:    dup v0.4s, w8
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
@@ -72,16 +71,15 @@ define <4 x i32> @test_urem_even_neg100(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_even_neg100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    adrp x8, .LCPI3_1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_1]
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #30
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #2
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    usra v1.4s, v0.4s, #2
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>


        


More information about the llvm-commits mailing list