[llvm] [RISCV][GlobalISel] Lower G_ATOMICRMW_SUB via G_ATOMICRMW_ADD (PR #155972)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 22:07:54 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Kane Wang (ReVe1uv)
<details>
<summary>Changes</summary>
RISCV does not provide a native atomic subtract instruction, so this patch lowers `G_ATOMICRMW_SUB` by negating the RHS value and performing an atomic add. The legalization rules in `RISCVLegalizerInfo` are updated accordingly, with libcall fallbacks when `StdExtA` is not available, and intrinsic legalization is extended to support `riscv_masked_atomicrmw_sub`.
For example, lowering
`%1 = atomicrmw sub ptr %a, i32 1 seq_cst`
on riscv32a produces:
```
li a1, -1
amoadd.w.aqrl a0, a1, (a0)
```
On riscv64a, where the RHS type is narrower than XLEN, it currently produces:
```
li a1, 1
neg a1, a1
amoadd.w.aqrl a0, a1, (a0)
```
There is still a constant-folding or InstConbiner gap. For instance, lowering
```
%b = sub i32 %x, %y
%1 = atomicrmw sub ptr %a, i32 %b seq_cst
```
generates:
```
subw a1, a1, a2
neg a1, a1
amoadd.w.aqrl a0, a1, (a0)
```
This sequence could be optimized further to eliminate the redundant neg. Addressing this may require improvements in the Combiner or Peephole Optimizer in future work.
---
Patch is 96.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155972.diff
13 Files Affected:
- (modified) llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp (+14)
- (modified) llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp (+6)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll (+930)
- (removed) llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add.ll (-299)
- (removed) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-rv32.mir (-73)
- (removed) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-rv64.mir (-96)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir (+154)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir (+204)
- (modified) llvm/test/CodeGen/RISCV/GlobalISel/legalizer-info-validation.mir (+2-2)
- (removed) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomicrmw-add-rv32.mir (-97)
- (removed) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomicrmw-add-rv64.mir (-128)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomicrmw-add-sub-rv32.mir (+206)
- (added) llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-atomicrmw-add-sub-rv64.mir (+274)
``````````diff
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 008c18837a522..1600594c955d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -26,6 +26,7 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/RuntimeLibcallUtil.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -37,6 +38,7 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LowerAtomic.h"
#include <numeric>
#include <optional>
@@ -4773,6 +4775,18 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
return lowerVectorReduction(MI);
case G_VAARG:
return lowerVAArg(MI);
+ case G_ATOMICRMW_SUB: {
+ auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ auto VNeg = MIRBuilder.buildNeg(ValLLT, Val);
+ auto NewRMW =
+ MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, RetLLT, Mem, VNeg, *MMO);
+
+ MIRBuilder.buildCopy(Ret, NewRMW);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index d6ae58ac890aa..ff733334f5d60 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -699,6 +699,11 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
.clampScalar(0, sXLen, sXLen);
+ getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
+ .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
+ .clampScalar(0, sXLen, sXLen)
+ .lower();
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -737,6 +742,7 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return true;
}
case Intrinsic::riscv_masked_atomicrmw_add:
+ case Intrinsic::riscv_masked_atomicrmw_sub:
return true;
}
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll
new file mode 100644
index 0000000000000..21b2bbfc59241
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomicrmw-add-sub.ll
@@ -0,0 +1,930 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA-ZABHA
+; RUN: llc -mtriple=riscv32 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32IA
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA-ZABHA
+; RUN: llc -mtriple=riscv64 -mattr=+a -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64IA
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64I
+
+define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i8:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a2, 255
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a2, a2, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-NEXT: add a5, a4, a1
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: and a5, a5, a2
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB0_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i8:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a2, 255
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: zext.b a1, a1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a2, a2, a0
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-NEXT: add a5, a4, a1
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: and a5, a5, a2
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB0_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i8 %rhs seq_cst
+ ret i8 %res
+}
+
+define i16 @atomicrmw_add_i16(ptr %ptr, i16 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i16:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a2, 16
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a4, a2, a0
+; RV32IA-NEXT: and a1, a1, a2
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a3)
+; RV32IA-NEXT: add a5, a2, a1
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: and a5, a5, a4
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB1_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a2, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i16:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lui a2, 16
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a4, a2, a0
+; RV64IA-NEXT: and a1, a1, a2
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a2, (a3)
+; RV64IA-NEXT: add a5, a2, a1
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: and a5, a5, a4
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB1_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a2, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i16 %rhs seq_cst
+ ret i16 %res
+}
+
+define i32 @atomicrmw_add_i32(ptr %ptr, i32 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i32:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_add_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i32:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i64 @atomicrmw_add_i64(ptr %ptr, i64 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_add_i64:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: addi sp, sp, -16
+; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-ZABHA-NEXT: li a3, 5
+; RV32IA-ZABHA-NEXT: call __atomic_fetch_add_8
+; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-ZABHA-NEXT: addi sp, sp, 16
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_add_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_add_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_add_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_add_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_add_i64:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_add_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_add_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_add_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw add ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i8:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i8:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a2, 255
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: zext.b a1, a1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a2, a2, a0
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a4, (a3)
+; RV32IA-NEXT: sub a5, a4, a1
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: and a5, a5, a2
+; RV32IA-NEXT: xor a5, a4, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB4_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a4, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i8:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_1
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i8:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i8:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a2, 255
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: zext.b a1, a1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a2, a2, a0
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a4, (a3)
+; RV64IA-NEXT: sub a5, a4, a1
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: and a5, a5, a2
+; RV64IA-NEXT: xor a5, a4, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB4_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a4, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i8:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_1
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i8 %rhs seq_cst
+ ret i8 %res
+}
+
+define i16 @atomicrmw_sub_i16(ptr %ptr, i16 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i16:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i16:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: lui a2, 16
+; RV32IA-NEXT: andi a3, a0, -4
+; RV32IA-NEXT: andi a0, a0, 3
+; RV32IA-NEXT: addi a2, a2, -1
+; RV32IA-NEXT: slli a0, a0, 3
+; RV32IA-NEXT: sll a4, a2, a0
+; RV32IA-NEXT: and a1, a1, a2
+; RV32IA-NEXT: sll a1, a1, a0
+; RV32IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NEXT: lr.w.aqrl a2, (a3)
+; RV32IA-NEXT: sub a5, a2, a1
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: and a5, a5, a4
+; RV32IA-NEXT: xor a5, a2, a5
+; RV32IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV32IA-NEXT: bnez a5, .LBB5_1
+; RV32IA-NEXT: # %bb.2:
+; RV32IA-NEXT: srl a0, a2, a0
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i16:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_2
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i16:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i16:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: lui a2, 16
+; RV64IA-NEXT: andi a3, a0, -4
+; RV64IA-NEXT: andi a0, a0, 3
+; RV64IA-NEXT: addi a2, a2, -1
+; RV64IA-NEXT: slli a0, a0, 3
+; RV64IA-NEXT: sllw a4, a2, a0
+; RV64IA-NEXT: and a1, a1, a2
+; RV64IA-NEXT: sllw a1, a1, a0
+; RV64IA-NEXT: .LBB5_1: # =>This Inner Loop Header: Depth=1
+; RV64IA-NEXT: lr.w.aqrl a2, (a3)
+; RV64IA-NEXT: sub a5, a2, a1
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: and a5, a5, a4
+; RV64IA-NEXT: xor a5, a2, a5
+; RV64IA-NEXT: sc.w.rl a5, a5, (a3)
+; RV64IA-NEXT: bnez a5, .LBB5_1
+; RV64IA-NEXT: # %bb.2:
+; RV64IA-NEXT: srlw a0, a2, a0
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i16:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_2
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i16 %rhs seq_cst
+ ret i16 %res
+}
+
+define i32 @atomicrmw_sub_i32(ptr %ptr, i32 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i32:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: neg a1, a1
+; RV32IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: neg a1, a1
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_4
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i32:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i32:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: neg a1, a1
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i32:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_4
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i32 %rhs seq_cst
+ ret i32 %res
+}
+
+define i64 @atomicrmw_sub_i64(ptr %ptr, i64 %rhs) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i64:
+; RV32IA-ZABHA: # %bb.0:
+; RV32IA-ZABHA-NEXT: addi sp, sp, -16
+; RV32IA-ZABHA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-ZABHA-NEXT: li a3, 5
+; RV32IA-ZABHA-NEXT: call __atomic_fetch_sub_8
+; RV32IA-ZABHA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-ZABHA-NEXT: addi sp, sp, 16
+; RV32IA-ZABHA-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_sub_8
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV32I-LABEL: atomicrmw_sub_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_sub_8
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV64IA-ZABHA-LABEL: atomicrmw_sub_i64:
+; RV64IA-ZABHA: # %bb.0:
+; RV64IA-ZABHA-NEXT: neg a1, a1
+; RV64IA-ZABHA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-ZABHA-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i64:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: neg a1, a1
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_8
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+ %res = atomicrmw sub ptr %ptr, i64 %rhs seq_cst
+ ret i64 %res
+}
+
+define i16 @atomicrmw_sub_i16_constant(ptr %a) nounwind {
+; RV32IA-ZABHA-LABEL: atomicrmw_sub_i16_constant:
+; RV32IA-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155972
More information about the llvm-commits
mailing list