[llvm] [AArch64] Allow usubo and uaddo to happen for any legal type (PR #162907)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 10 12:54:03 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: AZero13 (AZero13)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/162907.diff
9 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+3-3)
- (modified) llvm/test/CodeGen/AArch64/abdu-neg.ll (+1-1)
- (modified) llvm/test/CodeGen/AArch64/arm64-srl-and.ll (+4-6)
- (modified) llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll (+18-12)
- (modified) llvm/test/CodeGen/AArch64/cgp-usubo.ll (+3-5)
- (modified) llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll (+6-6)
- (modified) llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll (+7-9)
- (modified) llvm/test/CodeGen/AArch64/sat-add.ll (+22-22)
- (modified) llvm/test/CodeGen/AArch64/signed-truncation-check.ll (+3-3)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index e472e7d565d9b..ab518be9178ef 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -329,9 +329,9 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool MathUsed) const override {
- // Using overflow ops for overflow checks only should beneficial on
- // AArch64.
- return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
+ if (VT.isVector())
+ return false;
+ return !isOperationExpand(Opcode, VT);
}
Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 269cbf03f32a0..606162ade272b 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -355,7 +355,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, hs
+; CHECK-NEXT: cneg x0, x8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
index b58f6ba96a5b8..53d72bada8754 100644
--- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
@@ -2,20 +2,18 @@
; RUN: llc -mtriple=aarch64-linux-gnu -O3 < %s | FileCheck %s
; This used to miscompile:
-; The 16-bit -1 should not become 32-bit -1 (sub w8, w8, #1).
@g = global i16 0, align 4
define i32 @srl_and() {
; CHECK-LABEL: srl_and:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:g
-; CHECK-NEXT: mov w9, #50
; CHECK-NEXT: ldr x8, [x8, :got_lo12:g]
; CHECK-NEXT: ldrh w8, [x8]
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: mov w9, #65535
-; CHECK-NEXT: add w8, w8, w9
-; CHECK-NEXT: and w0, w8, w8, lsr #16
+; CHECK-NEXT: cmp w8, #50
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr @g, align 4
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
index 66fea3535b1ec..908da2ee61abc 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-uinc-udec-wrap.ll
@@ -75,10 +75,11 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: .LBB4_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxrb w8, [x0]
-; CHECK-NEXT: cmp w8, w1, uxtb
; CHECK-NEXT: sub w9, w8, #1
-; CHECK-NEXT: ccmp w8, #0, #4, ls
-; CHECK-NEXT: csel w9, w1, w9, eq
+; CHECK-NEXT: cmp w8, w1, uxtb
+; CHECK-NEXT: and w10, w9, #0xffffff00
+; CHECK-NEXT: ccmp w10, #0, #0, ls
+; CHECK-NEXT: csel w9, w1, w9, ne
; CHECK-NEXT: stlxrb w10, w9, [x0]
; CHECK-NEXT: cbnz w10, .LBB4_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -94,10 +95,11 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: .LBB5_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxrh w8, [x0]
-; CHECK-NEXT: cmp w8, w1, uxth
; CHECK-NEXT: sub w9, w8, #1
-; CHECK-NEXT: ccmp w8, #0, #4, ls
-; CHECK-NEXT: csel w9, w1, w9, eq
+; CHECK-NEXT: cmp w8, w1, uxth
+; CHECK-NEXT: and w10, w9, #0xffff0000
+; CHECK-NEXT: ccmp w10, #0, #0, ls
+; CHECK-NEXT: csel w9, w1, w9, ne
; CHECK-NEXT: stlxrh w10, w9, [x0]
; CHECK-NEXT: cbnz w10, .LBB5_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -113,10 +115,12 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: .LBB6_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxr w8, [x0]
+; CHECK-NEXT: subs w9, w8, #1
+; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp w8, w1
-; CHECK-NEXT: sub w9, w8, #1
-; CHECK-NEXT: ccmp w8, #0, #4, ls
-; CHECK-NEXT: csel w9, w1, w9, eq
+; CHECK-NEXT: csinc w10, w10, wzr, ls
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel w9, w1, w9, ne
; CHECK-NEXT: stlxr w10, w9, [x0]
; CHECK-NEXT: cbnz w10, .LBB6_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
@@ -133,10 +137,12 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-NEXT: .LBB7_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldaxr x0, [x8]
+; CHECK-NEXT: subs x9, x0, #1
+; CHECK-NEXT: cset w10, lo
; CHECK-NEXT: cmp x0, x1
-; CHECK-NEXT: sub x9, x0, #1
-; CHECK-NEXT: ccmp x0, #0, #4, ls
-; CHECK-NEXT: csel x9, x1, x9, eq
+; CHECK-NEXT: csinc w10, w10, wzr, ls
+; CHECK-NEXT: cmp w10, #0
+; CHECK-NEXT: csel x9, x1, x9, ne
; CHECK-NEXT: stlxr w10, x9, [x8]
; CHECK-NEXT: cbnz w10, .LBB7_1
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
index d307107fc07ee..e49e8e86561c7 100644
--- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll
+++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
@@ -108,11 +108,9 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) nounwind {
define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) nounwind {
; CHECK-LABEL: usubo_eq_constant1_op1_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: sub w9, w0, #1
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: str w9, [x1]
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: subs w8, w0, #1
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%s = add i32 %x, -1
%ov = icmp eq i32 %x, 0
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 3f4dd116d91f8..7917be5728591 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -192,12 +192,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; CHECK-NEXT: mov w22, #2 ; =0x2
; CHECK-NEXT: LBB3_5: ; %for.cond
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cbz w22, LBB3_8
+; CHECK-NEXT: subs w22, w22, #1
+; CHECK-NEXT: b.lo LBB3_8
; CHECK-NEXT: ; %bb.6: ; %for.body
; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1
-; CHECK-NEXT: sub w22, w22, #1
-; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2]
+; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.eq LBB3_5
; CHECK-NEXT: ; %bb.7: ; %if.then
@@ -238,12 +238,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; OUTLINE-ATOMICS-NEXT: cset w8, eq
; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond
; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4
+; OUTLINE-ATOMICS-NEXT: subs w22, w22, #1
+; OUTLINE-ATOMICS-NEXT: b.lo LBB3_4
; OUTLINE-ATOMICS-NEXT: ; %bb.2: ; %for.body
; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1
-; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1
-; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, w22, sxtw #2]
+; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
; OUTLINE-ATOMICS-NEXT: cmp w9, w10
; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1
; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then
diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
index 1207eaa2612a3..f2c84006910c5 100644
--- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
+++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
@@ -17,24 +17,22 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
; CHECK-ASM-NEXT: .cfi_remember_state
; CHECK-ASM-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-ASM-NEXT: sxtw x8, w0
+; CHECK-ASM-NEXT: mov w9, #10 // =0xa
; CHECK-ASM-NEXT: stp w1, w0, [sp, #8]
-; CHECK-ASM-NEXT: cmp x8, #10
-; CHECK-ASM-NEXT: b.hi .LBB0_5
+; CHECK-ASM-NEXT: subs x9, x9, x8
+; CHECK-ASM-NEXT: b.lo .LBB0_5
; CHECK-ASM-NEXT: // %bb.1: // %entry
-; CHECK-ASM-NEXT: mov w9, #10 // =0xa
-; CHECK-ASM-NEXT: sub x9, x9, x8
; CHECK-ASM-NEXT: cbz x9, .LBB0_5
; CHECK-ASM-NEXT: // %bb.2:
; CHECK-ASM-NEXT: ldrsw x9, [sp, #8]
+; CHECK-ASM-NEXT: mov w10, #10 // =0xa
+; CHECK-ASM-NEXT: subs x11, x10, x9
; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals
; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals
; CHECK-ASM-NEXT: strb wzr, [x10, x8]
-; CHECK-ASM-NEXT: cmp x9, #10
-; CHECK-ASM-NEXT: b.hi .LBB0_6
+; CHECK-ASM-NEXT: b.lo .LBB0_6
; CHECK-ASM-NEXT: // %bb.3:
-; CHECK-ASM-NEXT: mov w8, #10 // =0xa
-; CHECK-ASM-NEXT: sub x8, x8, x9
-; CHECK-ASM-NEXT: cbz x8, .LBB0_6
+; CHECK-ASM-NEXT: cbz x11, .LBB0_6
; CHECK-ASM-NEXT: // %bb.4:
; CHECK-ASM-NEXT: add x8, x10, x9
; CHECK-ASM-NEXT: strb wzr, [x8, #10]
diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll
index ecd48d6b7c65b..12044ebe20fa1 100644
--- a/llvm/test/CodeGen/AArch64/sat-add.ll
+++ b/llvm/test/CodeGen/AArch64/sat-add.ll
@@ -25,9 +25,9 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: add w8, w8, #42
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, #42
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -68,9 +68,9 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #42
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, #42
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -188,9 +188,9 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: add w8, w8, w1, uxtb
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -201,11 +201,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xff
-; CHECK-NEXT: add w9, w0, w1
-; CHECK-NEXT: add w8, w8, w0, uxtb
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w9, wzr, eq
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mvn w9, w1
+; CHECK-NEXT: add w10, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w10, wzr, ls
; CHECK-NEXT: ret
%noty = xor i8 %y, -1
%a = add i8 %x, %y
@@ -234,9 +234,9 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, w1, uxth
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -247,11 +247,11 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: add w9, w0, w1
-; CHECK-NEXT: add w8, w8, w0, uxth
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w9, wzr, eq
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: mvn w9, w1
+; CHECK-NEXT: add w10, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w10, wzr, ls
; CHECK-NEXT: ret
%noty = xor i16 %y, -1
%a = add i16 %x, %y
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index 7c80f9320faec..0720a7f72bd8c 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -313,9 +313,9 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #128
-; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: add w8, w0, #128
+; CHECK-NEXT: tst w8, #0xff80
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1)
``````````
</details>
https://github.com/llvm/llvm-project/pull/162907
More information about the llvm-commits
mailing list