[llvm] [AArch64][GlobalISel] Select G_ICMP Zero Instruction (PR #90054)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 25 07:00:12 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (chuongg3)
<details>
<summary>Changes</summary>
---
Patch is 62.34 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90054.diff
6 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+46)
- (modified) llvm/test/CodeGen/AArch64/aarch64-addv.ll (+12-13)
- (modified) llvm/test/CodeGen/AArch64/arm64-vabs.ll (+105-122)
- (modified) llvm/test/CodeGen/AArch64/icmp.ll (+664)
- (modified) llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll (+11-26)
- (modified) llvm/test/CodeGen/AArch64/neon-compare-instructions.ll (+140-350)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index a7abb58064a535..bf331b97069ba2 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -5397,6 +5397,52 @@ def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
(BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
+multiclass SelectSetCCZeroRHS<PatFrags InFrag, string INST> {
+ def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), immAllZerosV)),
+ (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
+ def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), immAllZerosV)),
+ (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
+ def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), immAllZerosV)),
+ (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
+ def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), immAllZerosV)),
+ (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
+ def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), immAllZerosV)),
+ (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
+ def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), immAllZerosV)),
+ (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
+ def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), immAllZerosV)),
+ (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
+}
+
+defm : SelectSetCCZeroRHS<seteq, "CMEQ">;
+defm : SelectSetCCZeroRHS<setgt, "CMGT">;
+defm : SelectSetCCZeroRHS<setge, "CMGE">;
+defm : SelectSetCCZeroRHS<setlt, "CMLT">;
+defm : SelectSetCCZeroRHS<setle, "CMLE">;
+
+multiclass SelectSetCCZeroLHS<PatFrags InFrag, string INST> {
+ def : Pat<(v8i8 (InFrag immAllZerosV, (v8i8 V64:$Rn))),
+ (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
+ def : Pat<(v16i8 (InFrag immAllZerosV, (v16i8 V128:$Rn))),
+ (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
+ def : Pat<(v4i16 (InFrag immAllZerosV, (v4i16 V64:$Rn))),
+ (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
+ def : Pat<(v8i16 (InFrag immAllZerosV, (v8i16 V128:$Rn))),
+ (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
+ def : Pat<(v2i32 (InFrag immAllZerosV, (v2i32 V64:$Rn))),
+ (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
+ def : Pat<(v4i32 (InFrag immAllZerosV, (v4i32 V128:$Rn))),
+ (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
+ def : Pat<(v2i64 (InFrag immAllZerosV, (v2i64 V128:$Rn))),
+ (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
+}
+
+defm : SelectSetCCZeroLHS<seteq, "CMEQ">;
+defm : SelectSetCCZeroLHS<setgt, "CMLT">;
+defm : SelectSetCCZeroLHS<setge, "CMLE">;
+defm : SelectSetCCZeroLHS<setlt, "CMGT">;
+defm : SelectSetCCZeroLHS<setle, "CMGE">;
+
let Predicates = [HasNEON] in {
def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
(ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
index ee035ec1941d57..94b792b887eb47 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll
@@ -94,20 +94,19 @@ define i32 @oversized_ADDV_256(ptr noalias nocapture readonly %arg1, ptr noalias
;
; GISEL-LABEL: oversized_ADDV_256:
; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: ldr d1, [x0]
-; GISEL-NEXT: ldr d2, [x1]
-; GISEL-NEXT: movi v0.2d, #0000000000000000
+; GISEL-NEXT: ldr d0, [x0]
+; GISEL-NEXT: ldr d1, [x1]
+; GISEL-NEXT: ushll v0.8h, v0.8b, #0
; GISEL-NEXT: ushll v1.8h, v1.8b, #0
-; GISEL-NEXT: ushll v2.8h, v2.8b, #0
-; GISEL-NEXT: usubl v3.4s, v1.4h, v2.4h
-; GISEL-NEXT: usubl2 v1.4s, v1.8h, v2.8h
-; GISEL-NEXT: cmgt v2.4s, v0.4s, v3.4s
-; GISEL-NEXT: cmgt v0.4s, v0.4s, v1.4s
-; GISEL-NEXT: neg v4.4s, v3.4s
-; GISEL-NEXT: neg v5.4s, v1.4s
-; GISEL-NEXT: bsl v2.16b, v4.16b, v3.16b
-; GISEL-NEXT: bsl v0.16b, v5.16b, v1.16b
-; GISEL-NEXT: add v0.4s, v2.4s, v0.4s
+; GISEL-NEXT: usubl v2.4s, v0.4h, v1.4h
+; GISEL-NEXT: usubl2 v0.4s, v0.8h, v1.8h
+; GISEL-NEXT: cmlt v1.4s, v2.4s, #0
+; GISEL-NEXT: cmlt v3.4s, v0.4s, #0
+; GISEL-NEXT: neg v4.4s, v2.4s
+; GISEL-NEXT: neg v5.4s, v0.4s
+; GISEL-NEXT: bsl v1.16b, v4.16b, v2.16b
+; GISEL-NEXT: bit v0.16b, v5.16b, v3.16b
+; GISEL-NEXT: add v0.4s, v1.4s, v0.4s
; GISEL-NEXT: addv s0, v0.4s
; GISEL-NEXT: fmov w0, s0
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
index d64327656a9e01..f7d31a214563bc 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll
@@ -252,18 +252,17 @@ define i16 @uabd16b_rdx(ptr %a, ptr %b) {
;
; CHECK-GI-LABEL: uabd16b_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: usubl.8h v3, v1, v2
-; CHECK-GI-NEXT: usubl2.8h v1, v1, v2
-; CHECK-GI-NEXT: cmgt.8h v2, v0, v3
-; CHECK-GI-NEXT: cmgt.8h v0, v0, v1
-; CHECK-GI-NEXT: neg.8h v4, v3
-; CHECK-GI-NEXT: neg.8h v5, v1
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
-; CHECK-GI-NEXT: add.8h v0, v2, v0
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: usubl.8h v2, v0, v1
+; CHECK-GI-NEXT: usubl2.8h v0, v0, v1
+; CHECK-GI-NEXT: cmlt.8h v1, v2, #0
+; CHECK-GI-NEXT: cmlt.8h v3, v0, #0
+; CHECK-GI-NEXT: neg.8h v4, v2
+; CHECK-GI-NEXT: neg.8h v5, v0
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
+; CHECK-GI-NEXT: add.8h v0, v1, v0
; CHECK-GI-NEXT: addv.8h h0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -290,29 +289,28 @@ define i32 @uabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: uabd16b_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ushll.8h v3, v0, #0
-; CHECK-GI-NEXT: ushll.8h v4, v1, #0
+; CHECK-GI-NEXT: ushll.8h v2, v0, #0
+; CHECK-GI-NEXT: ushll.8h v3, v1, #0
; CHECK-GI-NEXT: ushll2.8h v0, v0, #0
; CHECK-GI-NEXT: ushll2.8h v1, v1, #0
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: usubl.4s v5, v3, v4
-; CHECK-GI-NEXT: usubl2.4s v3, v3, v4
-; CHECK-GI-NEXT: usubl.4s v4, v0, v1
+; CHECK-GI-NEXT: usubl.4s v4, v2, v3
+; CHECK-GI-NEXT: usubl2.4s v2, v2, v3
+; CHECK-GI-NEXT: usubl.4s v3, v0, v1
; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v5
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v17, v3
-; CHECK-GI-NEXT: neg.4s v18, v4
+; CHECK-GI-NEXT: cmlt.4s v1, v4, #0
+; CHECK-GI-NEXT: cmlt.4s v5, v2, #0
+; CHECK-GI-NEXT: neg.4s v16, v4
+; CHECK-GI-NEXT: cmlt.4s v6, v3, #0
+; CHECK-GI-NEXT: cmlt.4s v7, v0, #0
+; CHECK-GI-NEXT: neg.4s v17, v2
+; CHECK-GI-NEXT: neg.4s v18, v3
; CHECK-GI-NEXT: neg.4s v19, v0
-; CHECK-GI-NEXT: bsl.16b v1, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v17, v6
-; CHECK-GI-NEXT: bit.16b v4, v18, v7
-; CHECK-GI-NEXT: bit.16b v0, v19, v2
-; CHECK-GI-NEXT: add.4s v1, v1, v3
-; CHECK-GI-NEXT: add.4s v0, v4, v0
+; CHECK-GI-NEXT: bsl.16b v1, v16, v4
+; CHECK-GI-NEXT: bit.16b v2, v17, v5
+; CHECK-GI-NEXT: bit.16b v3, v18, v6
+; CHECK-GI-NEXT: bit.16b v0, v19, v7
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: add.4s v0, v3, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -338,29 +336,28 @@ define i32 @sabd16b_rdx_i32(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: sabd16b_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: sshll.8h v3, v0, #0
-; CHECK-GI-NEXT: sshll.8h v4, v1, #0
+; CHECK-GI-NEXT: sshll.8h v2, v0, #0
+; CHECK-GI-NEXT: sshll.8h v3, v1, #0
; CHECK-GI-NEXT: sshll2.8h v0, v0, #0
; CHECK-GI-NEXT: sshll2.8h v1, v1, #0
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.4s v5, v3, v4
-; CHECK-GI-NEXT: ssubl2.4s v3, v3, v4
-; CHECK-GI-NEXT: ssubl.4s v4, v0, v1
+; CHECK-GI-NEXT: ssubl.4s v4, v2, v3
+; CHECK-GI-NEXT: ssubl2.4s v2, v2, v3
+; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v5
-; CHECK-GI-NEXT: cmgt.4s v6, v2, v3
-; CHECK-GI-NEXT: neg.4s v16, v5
-; CHECK-GI-NEXT: cmgt.4s v7, v2, v4
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: neg.4s v17, v3
-; CHECK-GI-NEXT: neg.4s v18, v4
+; CHECK-GI-NEXT: cmlt.4s v1, v4, #0
+; CHECK-GI-NEXT: cmlt.4s v5, v2, #0
+; CHECK-GI-NEXT: neg.4s v16, v4
+; CHECK-GI-NEXT: cmlt.4s v6, v3, #0
+; CHECK-GI-NEXT: cmlt.4s v7, v0, #0
+; CHECK-GI-NEXT: neg.4s v17, v2
+; CHECK-GI-NEXT: neg.4s v18, v3
; CHECK-GI-NEXT: neg.4s v19, v0
-; CHECK-GI-NEXT: bsl.16b v1, v16, v5
-; CHECK-GI-NEXT: bit.16b v3, v17, v6
-; CHECK-GI-NEXT: bit.16b v4, v18, v7
-; CHECK-GI-NEXT: bit.16b v0, v19, v2
-; CHECK-GI-NEXT: add.4s v1, v1, v3
-; CHECK-GI-NEXT: add.4s v0, v4, v0
+; CHECK-GI-NEXT: bsl.16b v1, v16, v4
+; CHECK-GI-NEXT: bit.16b v2, v17, v5
+; CHECK-GI-NEXT: bit.16b v3, v18, v6
+; CHECK-GI-NEXT: bit.16b v0, v19, v7
+; CHECK-GI-NEXT: add.4s v1, v1, v2
+; CHECK-GI-NEXT: add.4s v0, v3, v0
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -391,18 +388,17 @@ define i32 @uabd8h_rdx(ptr %a, ptr %b) {
;
; CHECK-GI-LABEL: uabd8h_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: usubl.4s v3, v1, v2
-; CHECK-GI-NEXT: usubl2.4s v1, v1, v2
-; CHECK-GI-NEXT: cmgt.4s v2, v0, v3
-; CHECK-GI-NEXT: cmgt.4s v0, v0, v1
-; CHECK-GI-NEXT: neg.4s v4, v3
-; CHECK-GI-NEXT: neg.4s v5, v1
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
-; CHECK-GI-NEXT: add.4s v0, v2, v0
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: usubl.4s v2, v0, v1
+; CHECK-GI-NEXT: usubl2.4s v0, v0, v1
+; CHECK-GI-NEXT: cmlt.4s v1, v2, #0
+; CHECK-GI-NEXT: cmlt.4s v3, v0, #0
+; CHECK-GI-NEXT: neg.4s v4, v2
+; CHECK-GI-NEXT: neg.4s v5, v0
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
+; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
; CHECK-GI-NEXT: ret
@@ -428,15 +424,14 @@ define i32 @sabd8h_rdx(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-GI-LABEL: sabd8h_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.4s v3, v0, v1
+; CHECK-GI-NEXT: ssubl.4s v2, v0, v1
; CHECK-GI-NEXT: ssubl2.4s v0, v0, v1
-; CHECK-GI-NEXT: neg.4s v4, v3
+; CHECK-GI-NEXT: cmlt.4s v1, v2, #0
+; CHECK-GI-NEXT: cmlt.4s v3, v0, #0
+; CHECK-GI-NEXT: neg.4s v4, v2
; CHECK-GI-NEXT: neg.4s v5, v0
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v3
-; CHECK-GI-NEXT: cmgt.4s v2, v2, v0
-; CHECK-GI-NEXT: bsl.16b v1, v4, v3
-; CHECK-GI-NEXT: bit.16b v0, v5, v2
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
; CHECK-GI-NEXT: add.4s v0, v1, v0
; CHECK-GI-NEXT: addv.4s s0, v0
; CHECK-GI-NEXT: fmov w0, s0
@@ -461,9 +456,8 @@ define i32 @uabdl4s_rdx_i32(<4 x i16> %a, <4 x i16> %b) {
;
; CHECK-GI-LABEL: uabdl4s_rdx_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: usubl.4s v0, v0, v1
-; CHECK-GI-NEXT: cmgt.4s v1, v2, v0
+; CHECK-GI-NEXT: cmlt.4s v1, v0, #0
; CHECK-GI-NEXT: neg.4s v2, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: addv.4s s0, v0
@@ -494,18 +488,17 @@ define i64 @uabd4s_rdx(ptr %a, ptr %b, i32 %h) {
;
; CHECK-GI-LABEL: uabd4s_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: ldr q1, [x0]
-; CHECK-GI-NEXT: ldr q2, [x1]
-; CHECK-GI-NEXT: movi.2d v0, #0000000000000000
-; CHECK-GI-NEXT: usubl.2d v3, v1, v2
-; CHECK-GI-NEXT: usubl2.2d v1, v1, v2
-; CHECK-GI-NEXT: cmgt.2d v2, v0, v3
-; CHECK-GI-NEXT: cmgt.2d v0, v0, v1
-; CHECK-GI-NEXT: neg.2d v4, v3
-; CHECK-GI-NEXT: neg.2d v5, v1
-; CHECK-GI-NEXT: bsl.16b v2, v4, v3
-; CHECK-GI-NEXT: bsl.16b v0, v5, v1
-; CHECK-GI-NEXT: add.2d v0, v2, v0
+; CHECK-GI-NEXT: ldr q0, [x0]
+; CHECK-GI-NEXT: ldr q1, [x1]
+; CHECK-GI-NEXT: usubl.2d v2, v0, v1
+; CHECK-GI-NEXT: usubl2.2d v0, v0, v1
+; CHECK-GI-NEXT: cmlt.2d v1, v2, #0
+; CHECK-GI-NEXT: cmlt.2d v3, v0, #0
+; CHECK-GI-NEXT: neg.2d v4, v2
+; CHECK-GI-NEXT: neg.2d v5, v0
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
+; CHECK-GI-NEXT: add.2d v0, v1, v0
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
; CHECK-GI-NEXT: ret
@@ -531,15 +524,14 @@ define i64 @sabd4s_rdx(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-GI-LABEL: sabd4s_rdx:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
-; CHECK-GI-NEXT: ssubl.2d v3, v0, v1
+; CHECK-GI-NEXT: ssubl.2d v2, v0, v1
; CHECK-GI-NEXT: ssubl2.2d v0, v0, v1
-; CHECK-GI-NEXT: neg.2d v4, v3
+; CHECK-GI-NEXT: cmlt.2d v1, v2, #0
+; CHECK-GI-NEXT: cmlt.2d v3, v0, #0
+; CHECK-GI-NEXT: neg.2d v4, v2
; CHECK-GI-NEXT: neg.2d v5, v0
-; CHECK-GI-NEXT: cmgt.2d v1, v2, v3
-; CHECK-GI-NEXT: cmgt.2d v2, v2, v0
-; CHECK-GI-NEXT: bsl.16b v1, v4, v3
-; CHECK-GI-NEXT: bit.16b v0, v5, v2
+; CHECK-GI-NEXT: bsl.16b v1, v4, v2
+; CHECK-GI-NEXT: bit.16b v0, v5, v3
; CHECK-GI-NEXT: add.2d v0, v1, v0
; CHECK-GI-NEXT: addp.2d d0, v0
; CHECK-GI-NEXT: fmov x0, d0
@@ -564,9 +556,8 @@ define i64 @uabdl2d_rdx_i64(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: uabdl2d_rdx_i64:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: usubl.2d v0, v0, v1
-; CHECK-GI-NEXT: cmgt.2d v1, v2, v0
+; CHECK-GI-NEXT: cmlt.2d v1, v0, #0
; CHECK-GI-NEXT: neg.2d v2, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: addp.2d d0, v0
@@ -1662,10 +1653,9 @@ define <2 x i32> @abspattern1(<2 x i32> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern1:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.2s v2, v0
-; CHECK-GI-NEXT: cmge.2s v1, v0, v1
-; CHECK-GI-NEXT: bif.8b v0, v2, v1
+; CHECK-GI-NEXT: neg.2s v1, v0
+; CHECK-GI-NEXT: cmge.2s v2, v0, #0
+; CHECK-GI-NEXT: bif.8b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <2 x i32> zeroinitializer, %a
%b = icmp sge <2 x i32> %a, zeroinitializer
@@ -1682,10 +1672,9 @@ define <4 x i16> @abspattern2(<4 x i16> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern2:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.4h v2, v0
-; CHECK-GI-NEXT: cmgt.4h v1, v0, v1
-; CHECK-GI-NEXT: bif.8b v0, v2, v1
+; CHECK-GI-NEXT: neg.4h v1, v0
+; CHECK-GI-NEXT: cmgt.4h v2, v0, #0
+; CHECK-GI-NEXT: bif.8b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <4 x i16> zeroinitializer, %a
%b = icmp sgt <4 x i16> %a, zeroinitializer
@@ -1701,10 +1690,9 @@ define <8 x i8> @abspattern3(<8 x i8> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern3:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.8b v2, v0
-; CHECK-GI-NEXT: cmgt.8b v1, v1, v0
-; CHECK-GI-NEXT: bit.8b v0, v2, v1
+; CHECK-GI-NEXT: neg.8b v1, v0
+; CHECK-GI-NEXT: cmlt.8b v2, v0, #0
+; CHECK-GI-NEXT: bit.8b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <8 x i8> zeroinitializer, %a
%b = icmp slt <8 x i8> %a, zeroinitializer
@@ -1720,10 +1708,9 @@ define <4 x i32> @abspattern4(<4 x i32> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern4:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.4s v2, v0
-; CHECK-GI-NEXT: cmge.4s v1, v0, v1
-; CHECK-GI-NEXT: bif.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.4s v1, v0
+; CHECK-GI-NEXT: cmge.4s v2, v0, #0
+; CHECK-GI-NEXT: bif.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <4 x i32> zeroinitializer, %a
%b = icmp sge <4 x i32> %a, zeroinitializer
@@ -1739,10 +1726,9 @@ define <8 x i16> @abspattern5(<8 x i16> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern5:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.8h v2, v0
-; CHECK-GI-NEXT: cmgt.8h v1, v0, v1
-; CHECK-GI-NEXT: bif.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.8h v1, v0
+; CHECK-GI-NEXT: cmgt.8h v2, v0, #0
+; CHECK-GI-NEXT: bif.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <8 x i16> zeroinitializer, %a
%b = icmp sgt <8 x i16> %a, zeroinitializer
@@ -1758,10 +1744,9 @@ define <16 x i8> @abspattern6(<16 x i8> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern6:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.16b v2, v0
-; CHECK-GI-NEXT: cmgt.16b v1, v1, v0
-; CHECK-GI-NEXT: bit.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.16b v1, v0
+; CHECK-GI-NEXT: cmlt.16b v2, v0, #0
+; CHECK-GI-NEXT: bit.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <16 x i8> zeroinitializer, %a
%b = icmp slt <16 x i8> %a, zeroinitializer
@@ -1777,10 +1762,9 @@ define <2 x i64> @abspattern7(<2 x i64> %a) nounwind {
;
; CHECK-GI-LABEL: abspattern7:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v1, #0000000000000000
-; CHECK-GI-NEXT: neg.2d v2, v0
-; CHECK-GI-NEXT: cmge.2d v1, v1, v0
-; CHECK-GI-NEXT: bit.16b v0, v2, v1
+; CHECK-GI-NEXT: neg.2d v1, v0
+; CHECK-GI-NEXT: cmle.2d v2, v0, #0
+; CHECK-GI-NEXT: bit.16b v0, v1, v2
; CHECK-GI-NEXT: ret
%tmp1neg = sub <2 x i64> zeroinitializer, %a
%b = icmp sle <2 x i64> %a, zeroinitializer
@@ -1796,9 +1780,8 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: uabd_i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: movi.2d v2, #0000000000000000
; CHECK-GI-NEXT: ssubl.2d v0, v0, v1
-; CHECK-GI-NEXT: cmgt.2d v1, v2, v0
+; CHECK-GI-NEXT: cmlt.2d v1, v0, #0
; CHECK-GI-NEXT: neg.2d v2, v0
; CHECK-GI-NEXT: bit.16b v0, v2, v1
; CHECK-GI-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/icmp.ll b/llvm/test/CodeGen/AArch64/icmp.ll
index 8e10847e7aae34..06e69572bc5779 100644
--- a/llvm/test/CodeGen/AArch64/icmp.ll
+++ b/llvm/test/CodeGen/AArch64/icmp.ll
@@ -319,3 +319,667 @@ entry:
%s = select <32 x i1> %c, <32 x i8> %d, <32 x i8> %e
ret <32 x i8> %s
}
+
+; ===== ICMP Zero RHS =====
+
+define <8 x i1> @icmp_eq_v8i8_Zero_RHS(<8 x i8> %a) {
+; CHECK-LABEL: icmp_eq_v8i8_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.8b, v0.8b, #0
+; CHECK-NEXT: ret
+ %c = icmp eq <8 x i8> %a, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ ret <8 x i1> %c
+}
+
+define <16 x i1> @icmp_eq_v16i8_Zero_RHS(<16 x i8> %a) {
+; CHECK-LABEL: icmp_eq_v16i8_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.16b, v0.16b, #0
+; CHECK-NEXT: ret
+ %c = icmp eq <16 x i8> %a, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
+ ret <16 x i1> %c
+}
+
+define <4 x i1> @icmp_eq_v4i16_Zero_RHS(<4 x i16> %a) {
+; CHECK-LABEL: icmp_eq_v4i16_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.4h, v0.4h, #0
+; CHECK-NEXT: ret
+ %c = icmp eq <4 x i16> %a, <i16 0, i16 0, i16 0, i16 0>
+ ret <4 x i1> %c
+}
+
+define <8 x i1> @icmp_eq_v8i16_Zero_RHS(<8 x i16> %a) {
+; CHECK-LABEL: icmp_eq_v8i16_Zero_RHS:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmeq v0.8h, v0.8h, #0
+; CHECK-NEXT: xtn v0.8b, v0.8h
+; CHECK-NEXT: ret
+ %c = icmp eq <8 x i16> %a, <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
+ ret <8 x i1> %c
+}
+
+define <2 x i1> @icmp_eq_v2i32_Zero_RHS(<2 x i32> %a) {
+; CHECK-LABEL: icmp_eq_v2i32_Zero_RHS:
+; CHECK: /...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/90054
More information about the llvm-commits
mailing list