[llvm] [ARM] Use correct ABI for atomic functions (PR #128891)
Oliver Stannard via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 26 07:21:01 PST 2025
https://github.com/ostannard updated https://github.com/llvm/llvm-project/pull/128891
>From b2b9dd189fd55775c7711643472d088d48b0df7f Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Tue, 25 Feb 2025 16:11:02 +0000
Subject: [PATCH 1/6] [AArch64] Don't extend arguments to libcalls
In AAPCS64, the high bits of registers used to pass small arguments are
unspecified, and it is the callee's job to sign- or zero-extend them if
needed. This means that we don't need to do this extension in the
caller.
---
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +++
llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll | 32 +++++++++----------
llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll | 32 +++++++++----------
llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll | 32 +++++++++----------
llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll | 32 +++++++++----------
.../AArch64/strictfp_f16_abi_promote.ll | 17 ++++------
6 files changed, 75 insertions(+), 74 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 1987c892ac080..9a009f0eb6980 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1378,6 +1378,10 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldScalarizeBinop(SDValue VecOp) const override {
return VecOp.getOpcode() == ISD::SETCC;
}
+
+ bool shouldExtendTypeInLibCall(EVT Type) const override {
+ return false;
+ }
};
namespace AArch64 {
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
index 21729b9dfd101..b650040617ecd 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
@@ -58,13 +58,13 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -72,7 +72,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
@@ -146,13 +146,13 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -160,7 +160,7 @@ define half @test_atomicrmw_fadd_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
@@ -711,19 +711,19 @@ define <2 x half> @test_atomicrmw_fadd_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w19
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w24
; SOFTFP-NOLSE-NEXT: bl __addsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w21
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl __addsf3
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
index 9b5e48d2b4217..41c5afe0f64a9 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
@@ -60,13 +60,13 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -74,7 +74,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
@@ -148,13 +148,13 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -162,7 +162,7 @@ define half @test_atomicrmw_fmax_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
@@ -591,19 +591,19 @@ define <2 x half> @test_atomicrmw_fmax_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w19
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w24
; SOFTFP-NOLSE-NEXT: bl fmaxf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w21
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl fmaxf
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
index f6c542fe7d407..a01bd182e61e6 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
@@ -60,13 +60,13 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -74,7 +74,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
@@ -148,13 +148,13 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -162,7 +162,7 @@ define half @test_atomicrmw_fmin_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
@@ -591,19 +591,19 @@ define <2 x half> @test_atomicrmw_fmin_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB6_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w19
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w24
; SOFTFP-NOLSE-NEXT: bl fminf
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w21
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl fminf
diff --git a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
index 82e0f14e68e26..01beb5c50afdd 100644
--- a/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
+++ b/llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
@@ -58,13 +58,13 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB0_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB0_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -72,7 +72,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align2(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB0_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB0_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB0_3 Depth=2
@@ -146,13 +146,13 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: .LBB1_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB1_3 Depth 2
-; SOFTFP-NOLSE-NEXT: mov w22, w0
-; SOFTFP-NOLSE-NEXT: and w0, w20, #0xffff
-; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w21, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w20
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
-; SOFTFP-NOLSE-NEXT: mov w1, w21
+; SOFTFP-NOLSE-NEXT: mov w22, w0
+; SOFTFP-NOLSE-NEXT: mov w0, w21
+; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
+; SOFTFP-NOLSE-NEXT: mov w1, w22
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w8, w0
@@ -160,7 +160,7 @@ define half @test_atomicrmw_fsub_f16_seq_cst_align4(ptr %ptr, half %value) #0 {
; SOFTFP-NOLSE-NEXT: // Parent Loop BB1_2 Depth=1
; SOFTFP-NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; SOFTFP-NOLSE-NEXT: ldaxrh w0, [x19]
-; SOFTFP-NOLSE-NEXT: cmp w0, w22, uxth
+; SOFTFP-NOLSE-NEXT: cmp w0, w21, uxth
; SOFTFP-NOLSE-NEXT: b.ne .LBB1_1
; SOFTFP-NOLSE-NEXT: // %bb.4: // %cmpxchg.trystore
; SOFTFP-NOLSE-NEXT: // in Loop: Header=BB1_3 Depth=2
@@ -711,19 +711,19 @@ define <2 x half> @test_atomicrmw_fsub_v2f16_seq_cst_align4(ptr %ptr, <2 x half>
; SOFTFP-NOLSE-NEXT: .LBB7_2: // %atomicrmw.start
; SOFTFP-NOLSE-NEXT: // =>This Loop Header: Depth=1
; SOFTFP-NOLSE-NEXT: // Child Loop BB7_3 Depth 2
-; SOFTFP-NOLSE-NEXT: and w0, w19, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w19
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w23, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w23
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w24
; SOFTFP-NOLSE-NEXT: bl __subsf3
; SOFTFP-NOLSE-NEXT: bl __truncsfhf2
; SOFTFP-NOLSE-NEXT: mov w24, w0
-; SOFTFP-NOLSE-NEXT: and w0, w21, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w21
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w25, w0
-; SOFTFP-NOLSE-NEXT: and w0, w22, #0xffff
+; SOFTFP-NOLSE-NEXT: mov w0, w22
; SOFTFP-NOLSE-NEXT: bl __extendhfsf2
; SOFTFP-NOLSE-NEXT: mov w1, w25
; SOFTFP-NOLSE-NEXT: bl __subsf3
diff --git a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
index 63b8a1cee27ae..771963cfd7042 100644
--- a/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
+++ b/llvm/test/CodeGen/AArch64/strictfp_f16_abi_promote.ll
@@ -20,7 +20,6 @@ define void @f16_arg(half %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_def_cfa_offset 16
; NOFP16-NEXT: .cfi_offset w19, -8
; NOFP16-NEXT: .cfi_offset w30, -16
-; NOFP16-NEXT: and w0, w0, #0xffff
; NOFP16-NEXT: mov x19, x1
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: str w0, [x19]
@@ -41,12 +40,11 @@ define void @v2f16_arg(<2 x half> %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_offset w20, -16
; NOFP16-NEXT: .cfi_offset w21, -24
; NOFP16-NEXT: .cfi_offset w30, -32
-; NOFP16-NEXT: and w0, w0, #0xffff
; NOFP16-NEXT: mov x19, x2
; NOFP16-NEXT: mov w20, w1
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w20, #0xffff
+; NOFP16-NEXT: mov w0, w20
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: stp w21, w0, [x19]
; NOFP16-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
@@ -70,15 +68,15 @@ define void @v3f16_arg(<3 x half> %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w30, -48
; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w1, #0xffff
+; NOFP16-NEXT: mov w0, w1
; NOFP16-NEXT: mov x19, x3
; NOFP16-NEXT: mov w20, w2
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: mov w22, w0
-; NOFP16-NEXT: and w0, w21, #0xffff
+; NOFP16-NEXT: mov w0, w21
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: mov w8, w0
-; NOFP16-NEXT: and w0, w20, #0xffff
+; NOFP16-NEXT: mov w0, w20
; NOFP16-NEXT: orr x21, x8, x22, lsl #32
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: str x21, [x19]
@@ -105,20 +103,19 @@ define void @v4f16_arg(<4 x half> %arg, ptr %ptr) #0 {
; NOFP16-NEXT: .cfi_offset w22, -32
; NOFP16-NEXT: .cfi_offset w23, -40
; NOFP16-NEXT: .cfi_offset w30, -48
-; NOFP16-NEXT: and w0, w0, #0xffff
; NOFP16-NEXT: mov x19, x4
; NOFP16-NEXT: mov w20, w3
; NOFP16-NEXT: mov w21, w2
; NOFP16-NEXT: mov w22, w1
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: mov w23, w0
-; NOFP16-NEXT: and w0, w22, #0xffff
+; NOFP16-NEXT: mov w0, w22
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: mov w22, w0
-; NOFP16-NEXT: and w0, w21, #0xffff
+; NOFP16-NEXT: mov w0, w21
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: mov w21, w0
-; NOFP16-NEXT: and w0, w20, #0xffff
+; NOFP16-NEXT: mov w0, w20
; NOFP16-NEXT: bl __extendhfsf2
; NOFP16-NEXT: stp w21, w0, [x19, #8]
; NOFP16-NEXT: stp w23, w22, [x19]
>From 51a48e3ba24c80e63367d0d52709c31f84b52b2a Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 26 Feb 2025 10:54:26 +0000
Subject: [PATCH 2/6] Re-generate test (whitespace change only)
---
llvm/test/CodeGen/ARM/atomic-load-store.ll | 52 +++++++++++-----------
1 file changed, 26 insertions(+), 26 deletions(-)
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 560dfde356c29..d28f100f1346a 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -324,17 +324,17 @@ define void @test_old_store_64bit(ptr %p, i64 %v) {
;
; ARMOPTNONE-LABEL: test_old_store_64bit:
; ARMOPTNONE: @ %bb.0:
-; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr}
-; ARMOPTNONE-NEXT: add r7, sp, #20
-; ARMOPTNONE-NEXT: sub sp, sp, #24
-; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; ARMOPTNONE-NEXT: dmb ish
-; ARMOPTNONE-NEXT: ldr r1, [r0]
-; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
-; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr}
+; ARMOPTNONE-NEXT: add r7, sp, #20
+; ARMOPTNONE-NEXT: sub sp, sp, #24
+; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: ldr r1, [r0]
+; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
+; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARMOPTNONE-NEXT: b LBB5_1
; ARMOPTNONE-NEXT: LBB5_1: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1
@@ -381,7 +381,7 @@ define void @test_old_store_64bit(ptr %p, i64 %v) {
; ARMOPTNONE-NEXT: LBB5_5: @ %atomicrmw.end
; ARMOPTNONE-NEXT: dmb ish
; ARMOPTNONE-NEXT: sub sp, r7, #20
-; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc}
+; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc}
;
; THUMBTWO-LABEL: test_old_store_64bit:
; THUMBTWO: @ %bb.0:
@@ -862,19 +862,19 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
;
; ARMOPTNONE-LABEL: store_atomic_f64__seq_cst:
; ARMOPTNONE: @ %bb.0:
-; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr}
-; ARMOPTNONE-NEXT: add r7, sp, #20
-; ARMOPTNONE-NEXT: sub sp, sp, #24
-; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
-; ARMOPTNONE-NEXT: vmov d16, r1, r2
-; ARMOPTNONE-NEXT: vmov r1, r2, d16
-; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
-; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
-; ARMOPTNONE-NEXT: dmb ish
-; ARMOPTNONE-NEXT: ldr r1, [r0]
-; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
-; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
-; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
+; ARMOPTNONE-NEXT: push {r4, r5, r7, r8, r10, r11, lr}
+; ARMOPTNONE-NEXT: add r7, sp, #20
+; ARMOPTNONE-NEXT: sub sp, sp, #24
+; ARMOPTNONE-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; ARMOPTNONE-NEXT: vmov d16, r1, r2
+; ARMOPTNONE-NEXT: vmov r1, r2, d16
+; ARMOPTNONE-NEXT: str r2, [sp, #8] @ 4-byte Spill
+; ARMOPTNONE-NEXT: str r1, [sp, #12] @ 4-byte Spill
+; ARMOPTNONE-NEXT: dmb ish
+; ARMOPTNONE-NEXT: ldr r1, [r0]
+; ARMOPTNONE-NEXT: ldr r0, [r0, #4]
+; ARMOPTNONE-NEXT: str r1, [sp, #16] @ 4-byte Spill
+; ARMOPTNONE-NEXT: str r0, [sp, #20] @ 4-byte Spill
; ARMOPTNONE-NEXT: b LBB13_1
; ARMOPTNONE-NEXT: LBB13_1: @ %atomicrmw.start
; ARMOPTNONE-NEXT: @ =>This Loop Header: Depth=1
@@ -921,7 +921,7 @@ define void @store_atomic_f64__seq_cst(ptr %ptr, double %val1) {
; ARMOPTNONE-NEXT: LBB13_5: @ %atomicrmw.end
; ARMOPTNONE-NEXT: dmb ish
; ARMOPTNONE-NEXT: sub sp, r7, #20
-; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc}
+; ARMOPTNONE-NEXT: pop {r4, r5, r7, r8, r10, r11, pc}
;
; THUMBTWO-LABEL: store_atomic_f64__seq_cst:
; THUMBTWO: @ %bb.0:
>From afbfb8665090af6b4ad6e4b65d5f4d0746a4b7af Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Tue, 25 Feb 2025 16:32:05 +0000
Subject: [PATCH 3/6] [ARM] Add tests showing missing truncation
---
llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 248 +++++++++++++++++++++
1 file changed, 248 insertions(+)
diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
index db71eae97544d..f64b01291990a 100644
--- a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
+++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
@@ -359,3 +359,251 @@ define i64 @cmpxchg64(ptr %p) {
%res.0 = extractvalue { i64, i1 } %res, 0
ret i64 %res.0
}
+
+define void @trunc_store8(ptr %p, i32 %val) {
+; NO-ATOMIC32-LABEL: trunc_store8:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r7, lr}
+; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: movs r2, #5
+; NO-ATOMIC32-NEXT: bl __atomic_store_1
+; NO-ATOMIC32-NEXT: pop {r7, pc}
+;
+; ATOMIC32-LABEL: trunc_store8:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: strb r1, [r0]
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bx lr
+ %trunc = trunc i32 %val to i8
+ store atomic i8 %trunc, ptr %p seq_cst, align 1
+ ret void
+}
+
+define i8 @trunc_rmw8(ptr %p, i32 %val) {
+; NO-ATOMIC32-LABEL: trunc_rmw8:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r7, lr}
+; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: movs r2, #5
+; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_1
+; NO-ATOMIC32-NEXT: pop {r7, pc}
+;
+; ATOMIC32-LABEL: trunc_rmw8:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: .save {r7, lr}
+; ATOMIC32-NEXT: push {r7, lr}
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bl __sync_fetch_and_add_1
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: pop {r7, pc}
+ %trunc = trunc i32 %val to i8
+ %v = atomicrmw add ptr %p, i8 %trunc seq_cst, align 1
+ ret i8 %v
+}
+
+define i8 @trunc_rmw8_signed(ptr %p, i32 %val) {
+; NO-ATOMIC32-LABEL: trunc_rmw8_signed:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r4, r5, r6, lr}
+; NO-ATOMIC32-NEXT: push {r4, r5, r6, lr}
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: mov r4, r1
+; NO-ATOMIC32-NEXT: mov r5, r0
+; NO-ATOMIC32-NEXT: ldrb r2, [r0]
+; NO-ATOMIC32-NEXT: b .LBB18_2
+; NO-ATOMIC32-NEXT: .LBB18_1: @ %atomicrmw.start
+; NO-ATOMIC32-NEXT: @ in Loop: Header=BB18_2 Depth=1
+; NO-ATOMIC32-NEXT: mov r0, r5
+; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1
+; NO-ATOMIC32-NEXT: ldr r2, [sp, #4]
+; NO-ATOMIC32-NEXT: cmp r0, #0
+; NO-ATOMIC32-NEXT: bne .LBB18_4
+; NO-ATOMIC32-NEXT: .LBB18_2: @ %atomicrmw.start
+; NO-ATOMIC32-NEXT: @ =>This Inner Loop Header: Depth=1
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: strb r2, [r1]
+; NO-ATOMIC32-NEXT: movs r3, #5
+; NO-ATOMIC32-NEXT: str r3, [sp]
+; NO-ATOMIC32-NEXT: sxtb r0, r4
+; NO-ATOMIC32-NEXT: sxtb r6, r2
+; NO-ATOMIC32-NEXT: cmp r6, r0
+; NO-ATOMIC32-NEXT: bgt .LBB18_1
+; NO-ATOMIC32-NEXT: @ %bb.3: @ %atomicrmw.start
+; NO-ATOMIC32-NEXT: @ in Loop: Header=BB18_2 Depth=1
+; NO-ATOMIC32-NEXT: mov r2, r4
+; NO-ATOMIC32-NEXT: b .LBB18_1
+; NO-ATOMIC32-NEXT: .LBB18_4: @ %atomicrmw.end
+; NO-ATOMIC32-NEXT: mov r0, r2
+; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: pop {r4, r5, r6, pc}
+;
+; ATOMIC32-LABEL: trunc_rmw8_signed:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: .save {r7, lr}
+; ATOMIC32-NEXT: push {r7, lr}
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bl __sync_fetch_and_max_1
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: pop {r7, pc}
+ %trunc = trunc i32 %val to i8
+ %v = atomicrmw max ptr %p, i8 %trunc seq_cst, align 1
+ ret i8 %v
+}
+
+define i8 @trunc_cmpxchg8(ptr %p, i32 %cmp, i32 %new) {
+; NO-ATOMIC32-LABEL: trunc_cmpxchg8:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r4, lr}
+; NO-ATOMIC32-NEXT: push {r4, lr}
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: add r4, sp, #4
+; NO-ATOMIC32-NEXT: strb r1, [r4]
+; NO-ATOMIC32-NEXT: movs r3, #5
+; NO-ATOMIC32-NEXT: str r3, [sp]
+; NO-ATOMIC32-NEXT: mov r1, r4
+; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: pop {r4, pc}
+;
+; ATOMIC32-LABEL: trunc_cmpxchg8:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: .save {r7, lr}
+; ATOMIC32-NEXT: push {r7, lr}
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_1
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: pop {r7, pc}
+ %trunc_cmp = trunc i32 %cmp to i8
+ %trunc_new = trunc i32 %new to i8
+ %res = cmpxchg ptr %p, i8 %trunc_cmp, i8 %trunc_new seq_cst seq_cst, align 1
+ %res.0 = extractvalue { i8, i1 } %res, 0
+ ret i8 %res.0
+}
+
+define void @trunc_store16(ptr %p, i32 %val) {
+; NO-ATOMIC32-LABEL: trunc_store16:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r7, lr}
+; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: movs r2, #5
+; NO-ATOMIC32-NEXT: bl __atomic_store_2
+; NO-ATOMIC32-NEXT: pop {r7, pc}
+;
+; ATOMIC32-LABEL: trunc_store16:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: strh r1, [r0]
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bx lr
+ %trunc = trunc i32 %val to i16
+ store atomic i16 %trunc, ptr %p seq_cst, align 2
+ ret void
+}
+
+define i16 @trunc_rmw16(ptr %p, i32 %val) {
+; NO-ATOMIC32-LABEL: trunc_rmw16:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r7, lr}
+; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: movs r2, #5
+; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_2
+; NO-ATOMIC32-NEXT: pop {r7, pc}
+;
+; ATOMIC32-LABEL: trunc_rmw16:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: .save {r7, lr}
+; ATOMIC32-NEXT: push {r7, lr}
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bl __sync_fetch_and_add_2
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: pop {r7, pc}
+ %trunc = trunc i32 %val to i16
+ %v = atomicrmw add ptr %p, i16 %trunc seq_cst, align 2
+ ret i16 %v
+}
+
+define i16 @trunc_rmw16_signed(ptr %p, i32 %val) {
+; NO-ATOMIC32-LABEL: trunc_rmw16_signed:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r4, r5, r6, lr}
+; NO-ATOMIC32-NEXT: push {r4, r5, r6, lr}
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: mov r4, r1
+; NO-ATOMIC32-NEXT: mov r5, r0
+; NO-ATOMIC32-NEXT: ldrh r2, [r0]
+; NO-ATOMIC32-NEXT: b .LBB22_2
+; NO-ATOMIC32-NEXT: .LBB22_1: @ %atomicrmw.start
+; NO-ATOMIC32-NEXT: @ in Loop: Header=BB22_2 Depth=1
+; NO-ATOMIC32-NEXT: mov r0, r5
+; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2
+; NO-ATOMIC32-NEXT: ldr r2, [sp, #4]
+; NO-ATOMIC32-NEXT: cmp r0, #0
+; NO-ATOMIC32-NEXT: bne .LBB22_4
+; NO-ATOMIC32-NEXT: .LBB22_2: @ %atomicrmw.start
+; NO-ATOMIC32-NEXT: @ =>This Inner Loop Header: Depth=1
+; NO-ATOMIC32-NEXT: add r1, sp, #4
+; NO-ATOMIC32-NEXT: strh r2, [r1]
+; NO-ATOMIC32-NEXT: movs r3, #5
+; NO-ATOMIC32-NEXT: str r3, [sp]
+; NO-ATOMIC32-NEXT: sxth r0, r4
+; NO-ATOMIC32-NEXT: sxth r6, r2
+; NO-ATOMIC32-NEXT: cmp r6, r0
+; NO-ATOMIC32-NEXT: bgt .LBB22_1
+; NO-ATOMIC32-NEXT: @ %bb.3: @ %atomicrmw.start
+; NO-ATOMIC32-NEXT: @ in Loop: Header=BB22_2 Depth=1
+; NO-ATOMIC32-NEXT: mov r2, r4
+; NO-ATOMIC32-NEXT: b .LBB22_1
+; NO-ATOMIC32-NEXT: .LBB22_4: @ %atomicrmw.end
+; NO-ATOMIC32-NEXT: mov r0, r2
+; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: pop {r4, r5, r6, pc}
+;
+; ATOMIC32-LABEL: trunc_rmw16_signed:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: .save {r7, lr}
+; ATOMIC32-NEXT: push {r7, lr}
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bl __sync_fetch_and_max_2
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: pop {r7, pc}
+ %trunc = trunc i32 %val to i16
+ %v = atomicrmw max ptr %p, i16 %trunc seq_cst, align 2
+ ret i16 %v
+}
+
+define i16 @trunc_cmpxchg16(ptr %p, i32 %cmp, i32 %new) {
+; NO-ATOMIC32-LABEL: trunc_cmpxchg16:
+; NO-ATOMIC32: @ %bb.0:
+; NO-ATOMIC32-NEXT: .save {r4, lr}
+; NO-ATOMIC32-NEXT: push {r4, lr}
+; NO-ATOMIC32-NEXT: .pad #8
+; NO-ATOMIC32-NEXT: sub sp, #8
+; NO-ATOMIC32-NEXT: add r4, sp, #4
+; NO-ATOMIC32-NEXT: strh r1, [r4]
+; NO-ATOMIC32-NEXT: movs r3, #5
+; NO-ATOMIC32-NEXT: str r3, [sp]
+; NO-ATOMIC32-NEXT: mov r1, r4
+; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: add sp, #8
+; NO-ATOMIC32-NEXT: pop {r4, pc}
+;
+; ATOMIC32-LABEL: trunc_cmpxchg16:
+; ATOMIC32: @ %bb.0:
+; ATOMIC32-NEXT: .save {r7, lr}
+; ATOMIC32-NEXT: push {r7, lr}
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_2
+; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: pop {r7, pc}
+ %trunc_cmp = trunc i32 %cmp to i16
+ %trunc_new = trunc i32 %new to i16
+ %res = cmpxchg ptr %p, i16 %trunc_cmp, i16 %trunc_new seq_cst seq_cst, align 2
+ %res.0 = extractvalue { i16, i1 } %res, 0
+ ret i16 %res.0
+}
>From 1bae5be62f58cafe273d2fe09c715ccffed31114 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 26 Feb 2025 10:57:48 +0000
Subject: [PATCH 4/6] [ARM] Add signext/zeroext to libcalls in AtomicExpand
The AArch32 PCS requires the caller to sign- or zero-extend small
integer types to 32-bit before passing them to a function. For most
calls this is handled by clang by setting the zeroext or signext
parameter attributes, but we were adding calls to library functions in
AtomicExpandPass without doing this.
Fixes #61880
---
llvm/lib/CodeGen/AtomicExpandPass.cpp | 13 +++++++
llvm/test/CodeGen/ARM/atomic-load-store.ll | 10 ++++-
llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 44 +++++++++++++---------
3 files changed, 47 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index a75fa688d87a8..b13d380ae7c7e 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1999,6 +1999,19 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
Value *IntValue =
Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
Args.push_back(IntValue);
+
+ // Set the zeroext/signext attributes on the parameter if needed to match
+ // the target's ABI.
+ if (TLI->shouldExtendTypeInLibCall(
+ TLI->getMemValueType(DL, SizedIntTy))) {
+ // The only atomic operations affected by signedness are min/max, and
+ // we don't have __atomic_ libcalls for them, so IsSigned is always
+ // false.
+ if (TLI->shouldSignExtendTypeInLibCall(SizedIntTy, false /*IsSigned*/))
+ Attr = Attr.addParamAttribute(Ctx, Args.size() - 1, Attribute::SExt);
+ else
+ Attr = Attr.addParamAttribute(Ctx, Args.size() - 1, Attribute::ZExt);
+ }
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
AllocaValue->setAlignment(AllocaAlignment);
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index d28f100f1346a..21eee8ce5837b 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -143,7 +143,7 @@ define void @test3(ptr %ptr1, ptr %ptr2) {
; ARMV4-NEXT: mov r4, r1
; ARMV4-NEXT: mov r1, #0
; ARMV4-NEXT: bl __atomic_load_1
-; ARMV4-NEXT: mov r1, r0
+; ARMV4-NEXT: and r1, r0, #255
; ARMV4-NEXT: mov r0, r4
; ARMV4-NEXT: mov r2, #0
; ARMV4-NEXT: bl __atomic_store_1
@@ -214,7 +214,7 @@ define void @test4(ptr %ptr1, ptr %ptr2) {
; ARMV4-NEXT: mov r4, r1
; ARMV4-NEXT: mov r1, #5
; ARMV4-NEXT: bl __atomic_load_1
-; ARMV4-NEXT: mov r1, r0
+; ARMV4-NEXT: and r1, r0, #255
; ARMV4-NEXT: mov r0, r4
; ARMV4-NEXT: mov r2, #5
; ARMV4-NEXT: bl __atomic_store_1
@@ -698,6 +698,9 @@ define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
; ARMV4-LABEL: store_atomic_f16__seq_cst:
; ARMV4: @ %bb.0:
; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r2, #255
+; ARMV4-NEXT: orr r2, r2, #65280
+; ARMV4-NEXT: and r1, r1, r2
; ARMV4-NEXT: mov r2, #5
; ARMV4-NEXT: bl __atomic_store_2
; ARMV4-NEXT: pop {r11, lr}
@@ -759,6 +762,9 @@ define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) {
; ARMV4-LABEL: store_atomic_bf16__seq_cst:
; ARMV4: @ %bb.0:
; ARMV4-NEXT: push {r11, lr}
+; ARMV4-NEXT: mov r2, #255
+; ARMV4-NEXT: orr r2, r2, #65280
+; ARMV4-NEXT: and r1, r1, r2
; ARMV4-NEXT: mov r2, #5
; ARMV4-NEXT: bl __atomic_store_2
; ARMV4-NEXT: pop {r11, lr}
diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
index f64b01291990a..f626c1ab12319 100644
--- a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
+++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
@@ -365,6 +365,7 @@ define void @trunc_store8(ptr %p, i32 %val) {
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: uxtb r1, r1
; NO-ATOMIC32-NEXT: movs r2, #5
; NO-ATOMIC32-NEXT: bl __atomic_store_1
; NO-ATOMIC32-NEXT: pop {r7, pc}
@@ -385,6 +386,7 @@ define i8 @trunc_rmw8(ptr %p, i32 %val) {
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: uxtb r1, r1
; NO-ATOMIC32-NEXT: movs r2, #5
; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_1
; NO-ATOMIC32-NEXT: pop {r7, pc}
@@ -411,31 +413,32 @@ define i8 @trunc_rmw8_signed(ptr %p, i32 %val) {
; NO-ATOMIC32-NEXT: sub sp, #8
; NO-ATOMIC32-NEXT: mov r4, r1
; NO-ATOMIC32-NEXT: mov r5, r0
-; NO-ATOMIC32-NEXT: ldrb r2, [r0]
+; NO-ATOMIC32-NEXT: ldrb r0, [r0]
; NO-ATOMIC32-NEXT: b .LBB18_2
; NO-ATOMIC32-NEXT: .LBB18_1: @ %atomicrmw.start
; NO-ATOMIC32-NEXT: @ in Loop: Header=BB18_2 Depth=1
+; NO-ATOMIC32-NEXT: uxtb r2, r0
; NO-ATOMIC32-NEXT: mov r0, r5
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1
-; NO-ATOMIC32-NEXT: ldr r2, [sp, #4]
-; NO-ATOMIC32-NEXT: cmp r0, #0
+; NO-ATOMIC32-NEXT: mov r1, r0
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: cmp r1, #0
; NO-ATOMIC32-NEXT: bne .LBB18_4
; NO-ATOMIC32-NEXT: .LBB18_2: @ %atomicrmw.start
; NO-ATOMIC32-NEXT: @ =>This Inner Loop Header: Depth=1
; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: strb r2, [r1]
+; NO-ATOMIC32-NEXT: strb r0, [r1]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: sxtb r0, r4
-; NO-ATOMIC32-NEXT: sxtb r6, r2
-; NO-ATOMIC32-NEXT: cmp r6, r0
+; NO-ATOMIC32-NEXT: sxtb r2, r4
+; NO-ATOMIC32-NEXT: sxtb r6, r0
+; NO-ATOMIC32-NEXT: cmp r6, r2
; NO-ATOMIC32-NEXT: bgt .LBB18_1
; NO-ATOMIC32-NEXT: @ %bb.3: @ %atomicrmw.start
; NO-ATOMIC32-NEXT: @ in Loop: Header=BB18_2 Depth=1
-; NO-ATOMIC32-NEXT: mov r2, r4
+; NO-ATOMIC32-NEXT: mov r0, r4
; NO-ATOMIC32-NEXT: b .LBB18_1
; NO-ATOMIC32-NEXT: .LBB18_4: @ %atomicrmw.end
-; NO-ATOMIC32-NEXT: mov r0, r2
; NO-ATOMIC32-NEXT: add sp, #8
; NO-ATOMIC32-NEXT: pop {r4, r5, r6, pc}
;
@@ -463,6 +466,7 @@ define i8 @trunc_cmpxchg8(ptr %p, i32 %cmp, i32 %new) {
; NO-ATOMIC32-NEXT: strb r1, [r4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
+; NO-ATOMIC32-NEXT: uxtb r2, r2
; NO-ATOMIC32-NEXT: mov r1, r4
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_1
; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
@@ -489,6 +493,7 @@ define void @trunc_store16(ptr %p, i32 %val) {
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: uxth r1, r1
; NO-ATOMIC32-NEXT: movs r2, #5
; NO-ATOMIC32-NEXT: bl __atomic_store_2
; NO-ATOMIC32-NEXT: pop {r7, pc}
@@ -509,6 +514,7 @@ define i16 @trunc_rmw16(ptr %p, i32 %val) {
; NO-ATOMIC32: @ %bb.0:
; NO-ATOMIC32-NEXT: .save {r7, lr}
; NO-ATOMIC32-NEXT: push {r7, lr}
+; NO-ATOMIC32-NEXT: uxth r1, r1
; NO-ATOMIC32-NEXT: movs r2, #5
; NO-ATOMIC32-NEXT: bl __atomic_fetch_add_2
; NO-ATOMIC32-NEXT: pop {r7, pc}
@@ -535,31 +541,32 @@ define i16 @trunc_rmw16_signed(ptr %p, i32 %val) {
; NO-ATOMIC32-NEXT: sub sp, #8
; NO-ATOMIC32-NEXT: mov r4, r1
; NO-ATOMIC32-NEXT: mov r5, r0
-; NO-ATOMIC32-NEXT: ldrh r2, [r0]
+; NO-ATOMIC32-NEXT: ldrh r0, [r0]
; NO-ATOMIC32-NEXT: b .LBB22_2
; NO-ATOMIC32-NEXT: .LBB22_1: @ %atomicrmw.start
; NO-ATOMIC32-NEXT: @ in Loop: Header=BB22_2 Depth=1
+; NO-ATOMIC32-NEXT: uxth r2, r0
; NO-ATOMIC32-NEXT: mov r0, r5
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2
-; NO-ATOMIC32-NEXT: ldr r2, [sp, #4]
-; NO-ATOMIC32-NEXT: cmp r0, #0
+; NO-ATOMIC32-NEXT: mov r1, r0
+; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
+; NO-ATOMIC32-NEXT: cmp r1, #0
; NO-ATOMIC32-NEXT: bne .LBB22_4
; NO-ATOMIC32-NEXT: .LBB22_2: @ %atomicrmw.start
; NO-ATOMIC32-NEXT: @ =>This Inner Loop Header: Depth=1
; NO-ATOMIC32-NEXT: add r1, sp, #4
-; NO-ATOMIC32-NEXT: strh r2, [r1]
+; NO-ATOMIC32-NEXT: strh r0, [r1]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
-; NO-ATOMIC32-NEXT: sxth r0, r4
-; NO-ATOMIC32-NEXT: sxth r6, r2
-; NO-ATOMIC32-NEXT: cmp r6, r0
+; NO-ATOMIC32-NEXT: sxth r2, r4
+; NO-ATOMIC32-NEXT: sxth r6, r0
+; NO-ATOMIC32-NEXT: cmp r6, r2
; NO-ATOMIC32-NEXT: bgt .LBB22_1
; NO-ATOMIC32-NEXT: @ %bb.3: @ %atomicrmw.start
; NO-ATOMIC32-NEXT: @ in Loop: Header=BB22_2 Depth=1
-; NO-ATOMIC32-NEXT: mov r2, r4
+; NO-ATOMIC32-NEXT: mov r0, r4
; NO-ATOMIC32-NEXT: b .LBB22_1
; NO-ATOMIC32-NEXT: .LBB22_4: @ %atomicrmw.end
-; NO-ATOMIC32-NEXT: mov r0, r2
; NO-ATOMIC32-NEXT: add sp, #8
; NO-ATOMIC32-NEXT: pop {r4, r5, r6, pc}
;
@@ -587,6 +594,7 @@ define i16 @trunc_cmpxchg16(ptr %p, i32 %cmp, i32 %new) {
; NO-ATOMIC32-NEXT: strh r1, [r4]
; NO-ATOMIC32-NEXT: movs r3, #5
; NO-ATOMIC32-NEXT: str r3, [sp]
+; NO-ATOMIC32-NEXT: uxth r2, r2
; NO-ATOMIC32-NEXT: mov r1, r4
; NO-ATOMIC32-NEXT: bl __atomic_compare_exchange_2
; NO-ATOMIC32-NEXT: ldr r0, [sp, #4]
>From 613550400921d465e6189d9b0ce94ef5ffbd3e4f Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 26 Feb 2025 14:41:40 +0000
Subject: [PATCH 5/6] [ARM][DAGISel] USe correct ABI for atomic functions
The AArch32 PCS passes small integer arguments in registers by zero- or
sign-extending them in the caller, but we were previously generating
calls to the __atomic and __sync functions which left other values in
the high bits. This is important in practice for the atomic min/max
functions, which have signed versions which expect the value to have
been correctly sign-extended.
Fixes #61880.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 37 +++++++++++++++----
llvm/test/CodeGen/ARM/atomic-cmpxchg.ll | 19 +++++-----
llvm/test/CodeGen/ARM/atomic-load-store.ll | 4 +-
llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll | 8 ++++
4 files changed, 51 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f56097fdbb51a..9cd4e42cfd062 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4386,23 +4386,46 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
AtomicOrdering Order = cast<AtomicSDNode>(Node)->getMergedOrdering();
RTLIB::Libcall LC = RTLIB::getOUTLINE_ATOMIC(Opc, Order, VT);
EVT RetVT = Node->getValueType(0);
+ SDValue ChainIn = Node->getOperand(0);
+ SDValue Pointer = Node->getOperand(1);
+ SDLoc dl(Node);
SmallVector<SDValue, 4> Ops;
+
+ // Zero/sign extend small operands if required by the target's ABI.
+ SmallVector<SDValue, 4> ExtendedOps;
+ for (auto Op = Node->op_begin() + 2, E = Node->op_end(); Op != E; ++Op) {
+ if (TLI.shouldExtendTypeInLibCall(VT)) {
+ bool IsSigned =
+ Opc == ISD::ATOMIC_LOAD_MIN || Opc == ISD::ATOMIC_LOAD_MAX;
+ if (TLI.shouldSignExtendTypeInLibCall(
+ EVT(VT).getTypeForEVT(*DAG.getContext()), IsSigned))
+ ExtendedOps.push_back(DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Op->getValueType(), *Op,
+ DAG.getValueType(VT)));
+ else
+ ExtendedOps.push_back(DAG.getZeroExtendInReg(*Op, dl, VT));
+
+ } else {
+ ExtendedOps.push_back(*Op);
+ }
+ }
+
if (TLI.getLibcallName(LC)) {
// If outline atomic available, prepare its arguments and expand.
- Ops.append(Node->op_begin() + 2, Node->op_end());
- Ops.push_back(Node->getOperand(1));
+ Ops.append(ExtendedOps.begin(), ExtendedOps.end());
+ Ops.push_back(Pointer);
} else {
LC = RTLIB::getSYNC(Opc, VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Unexpected atomic op or value type!");
// Arguments for expansion to sync libcall
- Ops.append(Node->op_begin() + 1, Node->op_end());
+ Ops.push_back(Pointer);
+ Ops.append(ExtendedOps.begin(), ExtendedOps.end());
}
- std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RetVT,
- Ops, CallOptions,
- SDLoc(Node),
- Node->getOperand(0));
+
+ std::pair<SDValue, SDValue> Tmp =
+ TLI.makeLibCall(DAG, LC, RetVT, Ops, CallOptions, dl, ChainIn);
Results.push_back(Tmp.first);
Results.push_back(Tmp.second);
break;
diff --git a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
index 4bf42d4ac9629..d231687c4f176 100644
--- a/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
+++ b/llvm/test/CodeGen/ARM/atomic-cmpxchg.ll
@@ -12,10 +12,10 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
; CHECK-ARM-LABEL: test_cmpxchg_res_i8:
; CHECK-ARM: .save {r4, lr}
; CHECK-ARM-NEXT: push {r4, lr}
-; CHECK-ARM-NEXT: mov r4, r1
+; CHECK-ARM-NEXT: and r4, r1, #255
+; CHECK-ARM-NEXT: mov r1, r4
; CHECK-ARM-NEXT: bl __sync_val_compare_and_swap_1
-; CHECK-ARM-NEXT: and r1, r4, #255
-; CHECK-ARM-NEXT: sub r0, r0, r1
+; CHECK-ARM-NEXT: sub r0, r0, r4
; CHECK-ARM-NEXT: rsbs r1, r0, #0
; CHECK-ARM-NEXT: adc r0, r0, r1
; CHECK-ARM-NEXT: pop {r4, lr}
@@ -25,10 +25,11 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
; CHECK-THUMB: .save {r4, lr}
; CHECK-THUMB-NEXT: push {r4, lr}
; CHECK-THUMB-NEXT: movs r4, r1
-; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1
; CHECK-THUMB-NEXT: movs r1, #255
-; CHECK-THUMB-NEXT: ands r1, r4
-; CHECK-THUMB-NEXT: subs r1, r0, r1
+; CHECK-THUMB-NEXT: ands r4, r1
+; CHECK-THUMB-NEXT: movs r1, r4
+; CHECK-THUMB-NEXT: bl __sync_val_compare_and_swap_1
+; CHECK-THUMB-NEXT: subs r1, r0, r4
; CHECK-THUMB-NEXT: rsbs r0, r1, #0
; CHECK-THUMB-NEXT: adcs r0, r1
; CHECK-THUMB-NEXT: pop {r4}
@@ -52,10 +53,10 @@ define zeroext i1 @test_cmpxchg_res_i8(ptr %addr, i8 %desired, i8 zeroext %new)
; CHECK-THUMBV6-LABEL: test_cmpxchg_res_i8:
; CHECK-THUMBV6: .save {r4, lr}
; CHECK-THUMBV6-NEXT: push {r4, lr}
-; CHECK-THUMBV6-NEXT: mov r4, r1
+; CHECK-THUMBV6-NEXT: uxtb r4, r1
+; CHECK-THUMBV6-NEXT: mov r1, r4
; CHECK-THUMBV6-NEXT: bl __sync_val_compare_and_swap_1
-; CHECK-THUMBV6-NEXT: uxtb r1, r4
-; CHECK-THUMBV6-NEXT: subs r1, r0, r1
+; CHECK-THUMBV6-NEXT: subs r1, r0, r4
; CHECK-THUMBV6-NEXT: rsbs r0, r1, #0
; CHECK-THUMBV6-NEXT: adcs r0, r1
; CHECK-THUMBV6-NEXT: pop {r4, pc}
diff --git a/llvm/test/CodeGen/ARM/atomic-load-store.ll b/llvm/test/CodeGen/ARM/atomic-load-store.ll
index 21eee8ce5837b..63dd68e8e607b 100644
--- a/llvm/test/CodeGen/ARM/atomic-load-store.ll
+++ b/llvm/test/CodeGen/ARM/atomic-load-store.ll
@@ -203,7 +203,7 @@ define void @test4(ptr %ptr1, ptr %ptr2) {
; THUMBONE-NEXT: movs r1, #0
; THUMBONE-NEXT: mov r2, r1
; THUMBONE-NEXT: bl __sync_val_compare_and_swap_1
-; THUMBONE-NEXT: mov r1, r0
+; THUMBONE-NEXT: uxtb r1, r0
; THUMBONE-NEXT: mov r0, r4
; THUMBONE-NEXT: bl __sync_lock_test_and_set_1
; THUMBONE-NEXT: pop {r4, pc}
@@ -692,6 +692,7 @@ define void @store_atomic_f16__seq_cst(ptr %ptr, half %val1) {
; THUMBONE-LABEL: store_atomic_f16__seq_cst:
; THUMBONE: @ %bb.0:
; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: uxth r1, r1
; THUMBONE-NEXT: bl __sync_lock_test_and_set_2
; THUMBONE-NEXT: pop {r7, pc}
;
@@ -756,6 +757,7 @@ define void @store_atomic_bf16__seq_cst(ptr %ptr, bfloat %val1) {
; THUMBONE-LABEL: store_atomic_bf16__seq_cst:
; THUMBONE: @ %bb.0:
; THUMBONE-NEXT: push {r7, lr}
+; THUMBONE-NEXT: uxth r1, r1
; THUMBONE-NEXT: bl __sync_lock_test_and_set_2
; THUMBONE-NEXT: pop {r7, pc}
;
diff --git a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
index f626c1ab12319..9743761c08e22 100644
--- a/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
+++ b/llvm/test/CodeGen/ARM/thumbv6m-atomic32.ll
@@ -396,6 +396,7 @@ define i8 @trunc_rmw8(ptr %p, i32 %val) {
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: uxtb r1, r1
; ATOMIC32-NEXT: bl __sync_fetch_and_add_1
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
@@ -447,6 +448,7 @@ define i8 @trunc_rmw8_signed(ptr %p, i32 %val) {
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: sxtb r1, r1
; ATOMIC32-NEXT: bl __sync_fetch_and_max_1
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
@@ -478,6 +480,8 @@ define i8 @trunc_cmpxchg8(ptr %p, i32 %cmp, i32 %new) {
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: uxtb r1, r1
+; ATOMIC32-NEXT: uxtb r2, r2
; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_1
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
@@ -524,6 +528,7 @@ define i16 @trunc_rmw16(ptr %p, i32 %val) {
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: uxth r1, r1
; ATOMIC32-NEXT: bl __sync_fetch_and_add_2
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
@@ -575,6 +580,7 @@ define i16 @trunc_rmw16_signed(ptr %p, i32 %val) {
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: sxth r1, r1
; ATOMIC32-NEXT: bl __sync_fetch_and_max_2
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
@@ -606,6 +612,8 @@ define i16 @trunc_cmpxchg16(ptr %p, i32 %cmp, i32 %new) {
; ATOMIC32-NEXT: .save {r7, lr}
; ATOMIC32-NEXT: push {r7, lr}
; ATOMIC32-NEXT: dmb sy
+; ATOMIC32-NEXT: uxth r1, r1
+; ATOMIC32-NEXT: uxth r2, r2
; ATOMIC32-NEXT: bl __sync_val_compare_and_swap_2
; ATOMIC32-NEXT: dmb sy
; ATOMIC32-NEXT: pop {r7, pc}
>From f7de953db7216a2b8048ba65b6b6536be7479008 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard at arm.com>
Date: Wed, 26 Feb 2025 15:20:03 +0000
Subject: [PATCH 6/6] clang-format
---
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 9a009f0eb6980..2a015f8ed67bc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1379,9 +1379,7 @@ class AArch64TargetLowering : public TargetLowering {
return VecOp.getOpcode() == ISD::SETCC;
}
- bool shouldExtendTypeInLibCall(EVT Type) const override {
- return false;
- }
+ bool shouldExtendTypeInLibCall(EVT Type) const override { return false; }
};
namespace AArch64 {
More information about the llvm-commits
mailing list