[llvm] [AArch64] Eliminate Common Subexpression of CSEL by Reassociation (PR #121350)
Marius Kamp via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 10:31:41 PST 2025
https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/121350
>From 6417c988efef63cdb08ddf69e21b820e3913751c Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sat, 21 Dec 2024 07:57:29 +0100
Subject: [PATCH 1/2] [AArch64] Add Tests for CSEL with Common Subexpression
after Reassociation; NFC
---
llvm/test/CodeGen/AArch64/csel-cmp-cse.ll | 799 ++++++++++++++++++++++
1 file changed, 799 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
new file mode 100644
index 00000000000000..3224cf5638a0e8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -0,0 +1,799 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s
+
+declare void @use_i1(i1 %x)
+declare void @use_i32(i32 %x)
+
+; Based on the IR generated for the `last` method of the type `slice` in Rust
+define ptr @test_last_elem_from_ptr(ptr noundef readnone %x0, i64 noundef %x1) {
+; CHECK-LABEL: test_last_elem_from_ptr:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add x8, x0, x1
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: sub x8, x8, #1
+; CHECK-NEXT: csel x0, xzr, x8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i64 %x1, 0
+ %add.ptr = getelementptr inbounds nuw i8, ptr %x0, i64 %x1
+ %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 -1
+ %retval.0 = select i1 %cmp, ptr null, ptr %add.ptr1
+ ret ptr %retval.0
+}
+
+define i32 @test_eq0_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_eq7_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq7_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 7
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, 7
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule7_sub7_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ule7_sub7_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 7
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule7_sub8_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ule7_sub8_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule0_sub1_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ule0_sub1_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ultminus2_subminus2_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ultminus2_subminus2_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmn w1, #2
+; CHECK-NEXT: add w8, w8, #2
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, -2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ultminus2_subminus3_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ultminus2_subminus3_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmn w1, #2
+; CHECK-NEXT: add w8, w8, #3
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, -2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -3
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ne0_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ne0_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, w8, wzr, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %ret
+}
+
+define i32 @test_ne7_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ne7_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: csel w0, w8, wzr, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 7
+ %ret = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %ret
+}
+
+define i32 @test_ultminus1_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ultminus1_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmn w1, #1
+; CHECK-NEXT: csinc w0, wzr, w8, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, -1
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ugt7_sub7_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ugt7_sub7_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 7
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ugt7_sub8_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ugt7_sub8_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sle7_sub7_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sle7_sub7_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 7
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sle7_sub8_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sle7_sub8_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_slt8_sub8_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_slt8_sub8_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, 8
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_slt8_sub7_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_slt8_sub7_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, 8
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 7
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sltminus8_subminus8_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sltminus8_subminus8_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmn w1, #8
+; CHECK-NEXT: add w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, -8
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sgtminus8_subminus8_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sgtminus8_subminus8_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmn w1, #8
+; CHECK-NEXT: add w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x1, -8
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sgtminus8_subminus7_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sgtminus8_subminus7_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmn w1, #8
+; CHECK-NEXT: add w8, w8, #7
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x1, -8
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -7
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_eq0_sub_addcomm_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_sub_addcomm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w1, w0
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add i32 %x1, %x0
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_eq0_subcomm_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_subcomm_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = add i32 -1, %add
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_multi_use_sub_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: mov w19, w1
+; CHECK-NEXT: sub w20, w8, #1
+; CHECK-NEXT: mov w0, w20
+; CHECK-NEXT: bl use_i32
+; CHECK-NEXT: cmp w19, #0
+; CHECK-NEXT: csel w0, wzr, w20, eq
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, 1
+ tail call void @use_i32(i32 %sub)
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_eq0_multi_use_cmp_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_multi_use_cmp_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: csel w19, wzr, w8, eq
+; CHECK-NEXT: bl use_i1
+; CHECK-NEXT: mov w0, w19
+; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ tail call void @use_i1(i1 %cmp)
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_eq0_multi_use_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_multi_use_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w20, -16
+; CHECK-NEXT: .cfi_offset w30, -32
+; CHECK-NEXT: add w20, w0, w1
+; CHECK-NEXT: mov w19, w1
+; CHECK-NEXT: mov w0, w20
+; CHECK-NEXT: bl use_i32
+; CHECK-NEXT: sub w8, w20, #1
+; CHECK-NEXT: cmp w19, #0
+; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add nuw i32 %x0, %x1
+ tail call void @use_i32(i32 %add)
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_eq1_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq1_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #1
+; CHECK-NEXT: sub w8, w8, #2
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 1
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ugtsmax_sub_add_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ugtsmax_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: add w8, w9, w8
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, 2147483647
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2147483648
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_eq_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #2
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ne_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ne_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #2
+; CHECK-NEXT: csel w0, w8, wzr, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2
+ %ret = select i1 %cmp, i32 %sub, i32 0
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ult7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ule7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ule7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #6
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 6
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ugt7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ugt7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #6
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 6
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_uge7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_uge7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #6
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_slt7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_slt7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_sle7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sle7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #8
+; CHECK-NEXT: sub w8, w8, #6
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 6
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_sgt7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sgt7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #7
+; CHECK-NEXT: sub w8, w8, #6
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 6
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_sge7_const_mismatch_i32(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sge7_const_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, #6
+; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x1, 7
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 8
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_unrelated_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_unrelated_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w2
+; CHECK-NEXT: cmp w1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, 0
+ %add = add nuw i32 %x0, %x2
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i16 @test_eq0_sub_add_i16(i16 %x0, i16 %x1) {
+; CHECK-LABEL: test_eq0_sub_add_i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: tst w1, #0xffff
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i16 %x1, 0
+ %add = add nuw i16 %x0, %x1
+ %sub = sub i16 %add, 1
+ %ret = select i1 %cmp, i16 0, i16 %sub
+ ret i16 %ret
+}
+
+; Negative test
+define i32 @test_ule_unsigned_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ule_unsigned_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, -1
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 0
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ugt_unsigned_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ugt_unsigned_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w0, w0, w1
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, -1
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 0
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_unsigned_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_ult_unsigned_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: add w0, w8, #1
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_uge_unsigned_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_uge_unsigned_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x1, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, -1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_slt_signed_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_slt_signed_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-2147483647 // =0x80000001
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: add w0, w9, w8
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, 2147483648
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2147483647
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_sle_signed_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sle_signed_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x1, 2147483647
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2147483648
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_sgt_signed_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sgt_signed_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: add w0, w9, w8
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x1, 2147483647
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2147483648
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_sge_signed_overflow(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_sge_signed_overflow:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x1, 2147483648
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 2147483647
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_eq0_bitwidth_mismatch(i32 %x0, i32 %x1) {
+; CHECK-LABEL: test_eq0_bitwidth_mismatch:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: tst w1, #0xffff
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %x1t = trunc i32 %x1 to i16
+ %cmp = icmp eq i16 %x1t, 0
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_eq0_bitwidth_mismatch_2(i32 %x0, i64 %x1) {
+; CHECK-LABEL: test_eq0_bitwidth_mismatch_2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp x1, #0
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %x1t = trunc i64 %x1 to i32
+ %cmp = icmp eq i64 %x1, 0
+ %add = add i32 %x0, %x1t
+ %sub = sub i32 %add, 1
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
>From 43f2da4ebe854378ed24e82b03e4fd965d885606 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Mon, 23 Dec 2024 15:16:46 +0100
Subject: [PATCH 2/2] [AArch64] Eliminate Common Subexpression of CSEL by
Reassociation
If we have a CSEL instruction that depends on the flags set by a
(SUBS x c) instruction and the true and/or false expression is
(add (add x y) -c), we can reassociate the latter expression to
(add (SUBS x c) y) and save one instruction.
Proof for the basic transformation: https://alive2.llvm.org/ce/z/-337Pb
We can extend this transformation for slightly different constants. For
example, if we have (add (add x y) -(c-1)) and a the comparison x <u c,
we can transform the comparison to x <=u c-1 to eliminate the comparison
instruction, too. Similarly, we can transform (x == 0) to (x <u 1).
Proofs for the transformations that alter the constants:
https://alive2.llvm.org/ce/z/3nVqgR
Fixes #119606.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 121 ++++++++++++++
llvm/test/CodeGen/AArch64/csel-cmp-cse.ll | 156 ++++++++----------
2 files changed, 186 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 24e1ebd8421fbf..ef0e58a30ecd73 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24838,6 +24838,122 @@ static SDValue foldCSELOfCSEL(SDNode *Op, SelectionDAG &DAG) {
return DAG.getNode(AArch64ISD::CSEL, DL, VT, L, R, CCValue, Cond);
}
+// Reassociate the true/false expressions of a CSEL instruction to obtain a
+// common subexpression with the comparison instruction. For example, change
+// (CSEL (ADD (ADD x y) -c) f LO (SUBS x c)) to
+// (CSEL (ADD (SUBS x c) y) f LO (SUBS x c)) such that (SUBS x c) is a common
+// subexpression.
+static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
+ SDValue SubsNode = N->getOperand(3);
+ if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse())
+ return SDValue();
+ auto *CmpOpConst = dyn_cast<ConstantSDNode>(SubsNode.getOperand(1));
+ if (!CmpOpConst)
+ return SDValue();
+
+ SDValue CmpOpOther = SubsNode.getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Get the operand that can be reassociated with the SUBS instruction.
+ auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) {
+ if (Op.getOpcode() != ISD::ADD)
+ return SDValue();
+ if (Op.getOperand(0).getOpcode() != ISD::ADD ||
+ !Op.getOperand(0).hasOneUse())
+ return SDValue();
+ SDValue X = Op.getOperand(0).getOperand(0);
+ SDValue Y = Op.getOperand(0).getOperand(1);
+ if (X != CmpOpOther)
+ std::swap(X, Y);
+ if (X != CmpOpOther)
+ return SDValue();
+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst)
+ return SDValue();
+ return Y;
+ };
+
+ // Try the reassociation using the given constant and condition code.
+ auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) {
+ APInt ExpectedConst = -NewCmpConst;
+ SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst);
+ SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst);
+ if (!TReassocOp && !FReassocOp)
+ return SDValue();
+
+ SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
+ DAG.getVTList(VT, MVT_CC), CmpOpOther,
+ DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
+ CmpOpConst->getValueType(0)));
+
+ auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
+ if (!ReassocOp)
+ return N->getOperand(OpNum);
+ SDValue Res = DAG.getNode(ISD::ADD, SDLoc(N->getOperand(OpNum)), VT,
+ NewCmp.getValue(0), ReassocOp);
+ DAG.ReplaceAllUsesWith(N->getOperand(OpNum), Res);
+ return Res;
+ };
+
+ SDValue TValReassoc = Reassociate(TReassocOp, 0);
+ SDValue FValReassoc = Reassociate(FReassocOp, 1);
+ return DAG.getNode(AArch64ISD::CSEL, SDLoc(N), VT, TValReassoc, FValReassoc,
+ DAG.getConstant(NewCC, SDLoc(N->getOperand(2)), MVT_CC),
+ NewCmp.getValue(1));
+ };
+
+ auto CC = static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
+
+ // First, try to eliminate the compare instruction by searching for a
+ // subtraction with the same constant.
+ if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC))
+ return R;
+
+ if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
+ return SDValue();
+
+ // Next, search for a subtraction with a slightly different constant. By
+ // adjusting the condition code, we can still eliminate the compare
+ // instruction. Adjusting the constant is only valid if it does not result
+ // in signed/unsigned wrap for signed/unsigned comparisons, respectively.
+ // Since such comparisons are trivially true/false, we should not encounter
+ // them here but check for them nevertheless to be on the safe side.
+ auto CheckedFold = [&](bool Check, APInt NewCmpConst,
+ AArch64CC::CondCode NewCC) {
+ return Check ? Fold(NewCmpConst, NewCC) : SDValue();
+ };
+ switch (CC) {
+ case AArch64CC::EQ:
+ case AArch64CC::LS:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
+ CmpOpConst->getAPIntValue() + 1, AArch64CC::LO);
+ case AArch64CC::NE:
+ case AArch64CC::HI:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxValue(),
+ CmpOpConst->getAPIntValue() + 1, AArch64CC::HS);
+ case AArch64CC::LO:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
+ CmpOpConst->getAPIntValue() - 1, AArch64CC::LS);
+ case AArch64CC::HS:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isZero(),
+ CmpOpConst->getAPIntValue() - 1, AArch64CC::HI);
+ case AArch64CC::LT:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
+ CmpOpConst->getAPIntValue() - 1, AArch64CC::LE);
+ case AArch64CC::LE:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
+ CmpOpConst->getAPIntValue() + 1, AArch64CC::LT);
+ case AArch64CC::GT:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isMaxSignedValue(),
+ CmpOpConst->getAPIntValue() + 1, AArch64CC::GE);
+ case AArch64CC::GE:
+ return CheckedFold(!CmpOpConst->getAPIntValue().isMinSignedValue(),
+ CmpOpConst->getAPIntValue() - 1, AArch64CC::GT);
+ default:
+ return SDValue();
+ }
+}
+
// Optimize CSEL instructions
static SDValue performCSELCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
@@ -24849,6 +24965,11 @@ static SDValue performCSELCombine(SDNode *N,
if (SDValue R = foldCSELOfCSEL(N, DAG))
return R;
+ // Try to reassociate the true/false expressions so that we can do CSE with
+ // a SUBS instruction used to perform the comparison.
+ if (SDValue R = reassociateCSELOperandsForCSE(N, DAG))
+ return R;
+
// CSEL 0, cttz(X), eq(X, 0) -> AND cttz bitwidth-1
// CSEL cttz(X), 0, ne(X, 0) -> AND cttz bitwidth-1
if (SDValue Folded = foldCSELofCTTZ(N, DAG))
diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
index 3224cf5638a0e8..d8904cc6e35e34 100644
--- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -8,10 +8,9 @@ declare void @use_i32(i32 %x)
define ptr @test_last_elem_from_ptr(ptr noundef readnone %x0, i64 noundef %x1) {
; CHECK-LABEL: test_last_elem_from_ptr:
; CHECK: // %bb.0:
-; CHECK-NEXT: add x8, x0, x1
-; CHECK-NEXT: cmp x1, #0
-; CHECK-NEXT: sub x8, x8, #1
-; CHECK-NEXT: csel x0, xzr, x8, eq
+; CHECK-NEXT: subs x8, x1, #1
+; CHECK-NEXT: add x8, x8, x0
+; CHECK-NEXT: csel x0, xzr, x8, lo
; CHECK-NEXT: ret
%cmp = icmp eq i64 %x1, 0
%add.ptr = getelementptr inbounds nuw i8, ptr %x0, i64 %x1
@@ -23,10 +22,9 @@ define ptr @test_last_elem_from_ptr(ptr noundef readnone %x0, i64 noundef %x1) {
define i32 @test_eq0_sub_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq0_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #0
-; CHECK-NEXT: sub w8, w8, #1
-; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: subs w8, w1, #1
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x1, 0
%add = add nuw i32 %x0, %x1
@@ -38,9 +36,8 @@ define i32 @test_eq0_sub_add_i32(i32 %x0, i32 %x1) {
define i32 @test_eq7_sub_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq7_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #7
-; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: subs w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, eq
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x1, 7
@@ -53,10 +50,9 @@ define i32 @test_eq7_sub_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ule7_sub7_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ule7_sub7_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #8
-; CHECK-NEXT: sub w8, w8, #7
-; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: subs w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ule i32 %x1, 7
%add = add i32 %x0, %x1
@@ -68,9 +64,8 @@ define i32 @test_ule7_sub7_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ule7_sub8_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ule7_sub8_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #8
-; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: subs w8, w1, #8
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ule i32 %x1, 7
@@ -83,10 +78,9 @@ define i32 @test_ule7_sub8_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ule0_sub1_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ule0_sub1_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #0
-; CHECK-NEXT: sub w8, w8, #1
-; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: subs w8, w1, #1
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ule i32 %x1, 0
%add = add i32 %x0, %x1
@@ -98,9 +92,8 @@ define i32 @test_ule0_sub1_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ultminus2_subminus2_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ultminus2_subminus2_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmn w1, #2
-; CHECK-NEXT: add w8, w8, #2
+; CHECK-NEXT: adds w8, w1, #2
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x1, -2
@@ -113,10 +106,9 @@ define i32 @test_ultminus2_subminus2_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ultminus2_subminus3_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ultminus2_subminus3_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmn w1, #2
-; CHECK-NEXT: add w8, w8, #3
-; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: adds w8, w1, #3
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ls
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x1, -2
%add = add i32 %x0, %x1
@@ -128,10 +120,9 @@ define i32 @test_ultminus2_subminus3_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ne0_sub_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ne0_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #0
-; CHECK-NEXT: sub w8, w8, #1
-; CHECK-NEXT: csel w0, w8, wzr, ne
+; CHECK-NEXT: subs w8, w1, #1
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, w8, wzr, hs
; CHECK-NEXT: ret
%cmp = icmp ne i32 %x1, 0
%add = add i32 %x0, %x1
@@ -143,9 +134,8 @@ define i32 @test_ne0_sub_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ne7_sub_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ne7_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #7
-; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: subs w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, w8, wzr, ne
; CHECK-NEXT: ret
%cmp = icmp ne i32 %x1, 7
@@ -158,9 +148,9 @@ define i32 @test_ne7_sub_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ultminus1_sub_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ultminus1_sub_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmn w1, #1
-; CHECK-NEXT: csinc w0, wzr, w8, ne
+; CHECK-NEXT: adds w8, w1, #1
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ne
; CHECK-NEXT: ret
%cmp = icmp ult i32 %x1, -1
%add = add i32 %x0, %x1
@@ -172,9 +162,8 @@ define i32 @test_ultminus1_sub_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ugt7_sub7_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ugt7_sub7_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #7
-; CHECK-NEXT: sub w8, w8, #7
+; CHECK-NEXT: subs w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, hi
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %x1, 7
@@ -187,10 +176,9 @@ define i32 @test_ugt7_sub7_add_i32(i32 %x0, i32 %x1) {
define i32 @test_ugt7_sub8_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_ugt7_sub8_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #7
-; CHECK-NEXT: sub w8, w8, #8
-; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: subs w8, w1, #8
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hs
; CHECK-NEXT: ret
%cmp = icmp ugt i32 %x1, 7
%add = add i32 %x0, %x1
@@ -202,10 +190,9 @@ define i32 @test_ugt7_sub8_add_i32(i32 %x0, i32 %x1) {
define i32 @test_sle7_sub7_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_sle7_sub7_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #8
-; CHECK-NEXT: sub w8, w8, #7
-; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: subs w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, le
; CHECK-NEXT: ret
%cmp = icmp sle i32 %x1, 7
%add = add i32 %x0, %x1
@@ -217,9 +204,8 @@ define i32 @test_sle7_sub7_add_i32(i32 %x0, i32 %x1) {
define i32 @test_sle7_sub8_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_sle7_sub8_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #8
-; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: subs w8, w1, #8
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lt
; CHECK-NEXT: ret
%cmp = icmp sle i32 %x1, 7
@@ -232,9 +218,8 @@ define i32 @test_sle7_sub8_add_i32(i32 %x0, i32 %x1) {
define i32 @test_slt8_sub8_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_slt8_sub8_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #8
-; CHECK-NEXT: sub w8, w8, #8
+; CHECK-NEXT: subs w8, w1, #8
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lt
; CHECK-NEXT: ret
%cmp = icmp slt i32 %x1, 8
@@ -247,10 +232,9 @@ define i32 @test_slt8_sub8_add_i32(i32 %x0, i32 %x1) {
define i32 @test_slt8_sub7_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_slt8_sub7_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #8
-; CHECK-NEXT: sub w8, w8, #7
-; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: subs w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, le
; CHECK-NEXT: ret
%cmp = icmp slt i32 %x1, 8
%add = add i32 %x0, %x1
@@ -262,9 +246,8 @@ define i32 @test_slt8_sub7_add_i32(i32 %x0, i32 %x1) {
define i32 @test_sltminus8_subminus8_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_sltminus8_subminus8_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmn w1, #8
-; CHECK-NEXT: add w8, w8, #8
+; CHECK-NEXT: adds w8, w1, #8
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, lt
; CHECK-NEXT: ret
%cmp = icmp slt i32 %x1, -8
@@ -277,9 +260,8 @@ define i32 @test_sltminus8_subminus8_add_i32(i32 %x0, i32 %x1) {
define i32 @test_sgtminus8_subminus8_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_sgtminus8_subminus8_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmn w1, #8
-; CHECK-NEXT: add w8, w8, #8
+; CHECK-NEXT: adds w8, w1, #8
+; CHECK-NEXT: add w8, w8, w0
; CHECK-NEXT: csel w0, wzr, w8, gt
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %x1, -8
@@ -292,10 +274,9 @@ define i32 @test_sgtminus8_subminus8_add_i32(i32 %x0, i32 %x1) {
define i32 @test_sgtminus8_subminus7_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_sgtminus8_subminus7_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmn w1, #8
-; CHECK-NEXT: add w8, w8, #7
-; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: adds w8, w1, #7
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ge
; CHECK-NEXT: ret
%cmp = icmp sgt i32 %x1, -8
%add = add i32 %x0, %x1
@@ -307,10 +288,9 @@ define i32 @test_sgtminus8_subminus7_add_i32(i32 %x0, i32 %x1) {
define i32 @test_eq0_sub_addcomm_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq0_sub_addcomm_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w1, w0
-; CHECK-NEXT: cmp w1, #0
-; CHECK-NEXT: sub w8, w8, #1
-; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: subs w8, w1, #1
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x1, 0
%add = add i32 %x1, %x0
@@ -322,10 +302,9 @@ define i32 @test_eq0_sub_addcomm_i32(i32 %x0, i32 %x1) {
define i32 @test_eq0_subcomm_add_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq0_subcomm_add_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, #0
-; CHECK-NEXT: sub w8, w8, #1
-; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: subs w8, w1, #1
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x1, 0
%add = add i32 %x0, %x1
@@ -337,21 +316,16 @@ define i32 @test_eq0_subcomm_add_i32(i32 %x0, i32 %x1) {
define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq0_multi_use_sub_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
-; CHECK-NEXT: .cfi_offset w20, -16
-; CHECK-NEXT: .cfi_offset w30, -32
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: mov w19, w1
-; CHECK-NEXT: sub w20, w8, #1
-; CHECK-NEXT: mov w0, w20
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: subs w8, w1, #1
+; CHECK-NEXT: add w0, w8, w0
+; CHECK-NEXT: csel w19, wzr, w0, lo
; CHECK-NEXT: bl use_i32
-; CHECK-NEXT: cmp w19, #0
-; CHECK-NEXT: csel w0, wzr, w20, eq
-; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: mov w0, w19
+; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
%cmp = icmp eq i32 %x1, 0
%add = add nuw i32 %x0, %x1
More information about the llvm-commits
mailing list