[llvm] b423e1f - [SDAG][RISCV] Avoid neg instructions when lowering atomic_load_sub with a constant rhs
Yingwei Zheng via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 16 02:10:50 PDT 2023
Author: Yingwei Zheng
Date: 2023-09-16T17:09:41+08:00
New Revision: b423e1f05dc3dc7ca05d1d788d0e6b5d8423b5b6
URL: https://github.com/llvm/llvm-project/commit/b423e1f05dc3dc7ca05d1d788d0e6b5d8423b5b6
DIFF: https://github.com/llvm/llvm-project/commit/b423e1f05dc3dc7ca05d1d788d0e6b5d8423b5b6.diff
LOG: [SDAG][RISCV] Avoid neg instructions when lowering atomic_load_sub with a constant rhs
This patch avoids creating (sub x0, rhs) when lowering atomic_load_sub with a constant rhs.
Comparison with GCC: https://godbolt.org/z/c5zPdP7j4
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D158673
Added:
llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/ARM/ARMISelLowering.cpp
llvm/lib/Target/Mips/Mips16ISelLowering.cpp
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoA.td
llvm/test/CodeGen/Mips/atomicops.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index af5fad7ba311554..aa286d5f55d615e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3133,6 +3133,23 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Res.getValue(1));
break;
}
+ case ISD::ATOMIC_LOAD_SUB: {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue RHS = Node->getOperand(2);
+ AtomicSDNode *AN = cast<AtomicSDNode>(Node);
+ if (RHS->getOpcode() == ISD::SIGN_EXTEND_INREG &&
+ cast<VTSDNode>(RHS->getOperand(1))->getVT() == AN->getMemoryVT())
+ RHS = RHS->getOperand(0);
+ SDValue NewRHS =
+ DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
+ SDValue Res = DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, AN->getMemoryVT(),
+ Node->getOperand(0), Node->getOperand(1),
+ NewRHS, AN->getMemOperand());
+ Results.push_back(Res);
+ Results.push_back(Res.getValue(1));
+ break;
+ }
case ISD::DYNAMIC_STACKALLOC:
ExpandDYNAMIC_STACKALLOC(Node, Results);
break;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c846a0ae929bf52..5cc001c44e7a24f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -795,8 +795,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
+ if (!Subtarget->hasLSE() && !Subtarget->outlineAtomics()) {
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, LibCall);
+ } else {
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand);
+ }
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
@@ -6113,8 +6118,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::VECREDUCE_FMAXIMUM:
case ISD::VECREDUCE_FMINIMUM:
return LowerVECREDUCE(Op, DAG);
- case ISD::ATOMIC_LOAD_SUB:
- return LowerATOMIC_LOAD_SUB(Op, DAG);
case ISD::ATOMIC_LOAD_AND:
return LowerATOMIC_LOAD_AND(Op, DAG);
case ISD::DYNAMIC_STACKALLOC:
@@ -13748,23 +13751,6 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
}
}
-SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
- SelectionDAG &DAG) const {
- auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
- if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
- return SDValue();
-
- // LSE has an atomic load-add instruction, but not a load-sub.
- SDLoc dl(Op);
- MVT VT = Op.getSimpleValueType();
- SDValue RHS = Op.getOperand(2);
- AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
- RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
- return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
- Op.getOperand(0), Op.getOperand(1), RHS,
- AN->getMemOperand());
-}
-
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index f51a0ebc2cac395..e015f68dabc6977 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1097,7 +1097,6 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerVSCALE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerATOMIC_LOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerATOMIC_LOAD_AND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerWindowsDYNAMIC_STACKALLOC(SDValue Op, SDValue Chain,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 21489a61e576f5d..e72ba77d56a1e60 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1349,19 +1349,19 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
Subtarget->hasAnyDataBarrier() ? Custom : Expand);
- // Set them all for expansion, which will force libcalls.
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ // Set them all for libcall, which will force libcalls.
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
// Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
// Unordered/Monotonic case.
if (!InsertFencesForAtomic) {
diff --git a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
index ea35608e6a7b774..d97f59b5b2c76bd 100644
--- a/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
+++ b/llvm/lib/Target/Mips/Mips16ISelLowering.cpp
@@ -127,19 +127,19 @@ Mips16TargetLowering::Mips16TargetLowering(const MipsTargetMachine &TM,
if (!Subtarget.useSoftFloat())
setMips16HardFloatLibCalls();
- setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
- setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
- setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
+ setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, LibCall);
+ setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, LibCall);
+ setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, LibCall);
setOperationAction(ISD::ROTR, MVT::i32, Expand);
setOperationAction(ISD::ROTR, MVT::i64, Expand);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c5c68961a029a35..1febc0216f5850c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1229,14 +1229,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
}
+ if (Subtarget.hasStdExtA())
+ setOperationAction(ISD::ATOMIC_LOAD_SUB, XLenVT, Expand);
+
if (Subtarget.hasForcedAtomics()) {
- // Set atomic rmw/cas operations to expand to force __sync libcalls.
+ // Force __sync libcalls to be emitted for atomic rmw/cas operations.
setOperationAction(
{ISD::ATOMIC_CMP_SWAP, ISD::ATOMIC_SWAP, ISD::ATOMIC_LOAD_ADD,
ISD::ATOMIC_LOAD_SUB, ISD::ATOMIC_LOAD_AND, ISD::ATOMIC_LOAD_OR,
ISD::ATOMIC_LOAD_XOR, ISD::ATOMIC_LOAD_NAND, ISD::ATOMIC_LOAD_MIN,
ISD::ATOMIC_LOAD_MAX, ISD::ATOMIC_LOAD_UMIN, ISD::ATOMIC_LOAD_UMAX},
- XLenVT, Expand);
+ XLenVT, LibCall);
}
if (Subtarget.hasVendorXTHeadMemIdx()) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index 1c305f17590acc0..d7314866789ce47 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -168,17 +168,6 @@ defm : AMOPat<"atomic_load_min_32", "AMOMIN_W">;
defm : AMOPat<"atomic_load_umax_32", "AMOMAXU_W">;
defm : AMOPat<"atomic_load_umin_32", "AMOMINU_W">;
-def : Pat<(XLenVT (atomic_load_sub_32_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_W GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_release GPR:$addr, GPR:$incr)),
- (AMOADD_W_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(XLenVT (atomic_load_sub_32_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_W_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-
/// Pseudo AMOs
class PseudoAMO : Pseudo<(outs GPR:$res, GPR:$scratch),
@@ -338,19 +327,6 @@ defm : AMOPat<"atomic_load_min_64", "AMOMIN_D", i64>;
defm : AMOPat<"atomic_load_umax_64", "AMOMAXU_D", i64>;
defm : AMOPat<"atomic_load_umin_64", "AMOMINU_D", i64>;
-/// 64-bit AMOs
-
-def : Pat<(i64 (atomic_load_sub_64_monotonic GPR:$addr, GPR:$incr)),
- (AMOADD_D GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_acquire GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_release GPR:$addr, GPR:$incr)),
- (AMOADD_D_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_acq_rel GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-def : Pat<(i64 (atomic_load_sub_64_seq_cst GPR:$addr, GPR:$incr)),
- (AMOADD_D_AQ_RL GPR:$addr, (SUB (XLenVT X0), GPR:$incr))>;
-
/// 64-bit pseudo AMOs
let Size = 20 in
diff --git a/llvm/test/CodeGen/Mips/atomicops.ll b/llvm/test/CodeGen/Mips/atomicops.ll
index a67b6206c37ebc0..14e401e1f09632b 100644
--- a/llvm/test/CodeGen/Mips/atomicops.ll
+++ b/llvm/test/CodeGen/Mips/atomicops.ll
@@ -12,6 +12,15 @@ entry:
; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_add_4)(${{[0-9]+}})
}
+define i32 @atomic_load_sub(ptr %mem, i32 %val, i32 %c) nounwind {
+; 16-LABEL: atomic_load_sub:
+; 16: lw ${{[0-9]+}}, %call16(__sync_synchronize)(${{[0-9]+}})
+; 16: lw ${{[0-9]+}}, %call16(__sync_fetch_and_sub_4)(${{[0-9]+}})
+entry:
+ %0 = atomicrmw sub ptr %mem, i32 %val seq_cst
+ ret i32 %0
+}
+
define i32 @main() nounwind {
entry:
%x = alloca i32, align 4
@@ -37,5 +46,3 @@ entry:
}
declare i32 @printf(ptr nocapture, ...) nounwind
-
-
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
new file mode 100644
index 000000000000000..9fcf4c1b0541bd6
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw-sub.ll
@@ -0,0 +1,181 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA %s
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefix=RV64I %s
+; RUN: llc -mtriple=riscv64 -mattr=+a -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA %s
+
+define i32 @atomicrmw_sub_i32_constant(ptr %a) nounwind {
+; RV32I-LABEL: atomicrmw_sub_i32_constant:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_4 at plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_constant:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: li a1, -1
+; RV32IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i32_constant:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_4 at plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i32_constant:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a1, -1
+; RV64IA-NEXT: amoadd.w.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i32 1 seq_cst
+ ret i32 %1
+}
+
+define i64 @atomicrmw_sub_i64_constant(ptr %a) nounwind {
+; RV32I-LABEL: atomicrmw_sub_i64_constant:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: li a1, 1
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: li a2, 0
+; RV32I-NEXT: call __atomic_fetch_sub_8 at plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_constant:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: li a1, 1
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: li a2, 0
+; RV32IA-NEXT: call __atomic_fetch_sub_8 at plt
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i64_constant:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: li a1, 1
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_8 at plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i64_constant:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: li a1, -1
+; RV64IA-NEXT: amoadd.d.aqrl a0, a1, (a0)
+; RV64IA-NEXT: ret
+ %1 = atomicrmw sub ptr %a, i64 1 seq_cst
+ ret i64 %1
+}
+
+define i32 @atomicrmw_sub_i32_neg(ptr %a, i32 %x, i32 %y) nounwind {
+; RV32I-LABEL: atomicrmw_sub_i32_neg:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sub a1, a1, a2
+; RV32I-NEXT: li a2, 5
+; RV32I-NEXT: call __atomic_fetch_sub_4 at plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i32_neg:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: sub a2, a2, a1
+; RV32IA-NEXT: amoadd.w.aqrl a0, a2, (a0)
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i32_neg:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: subw a1, a1, a2
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_4 at plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i32_neg:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: sub a2, a2, a1
+; RV64IA-NEXT: amoadd.w.aqrl a0, a2, (a0)
+; RV64IA-NEXT: ret
+ %b = sub i32 %x, %y
+ %1 = atomicrmw sub ptr %a, i32 %b seq_cst
+ ret i32 %1
+}
+
+define i64 @atomicrmw_sub_i64_neg(ptr %a, i64 %x, i64 %y) nounwind {
+; RV32I-LABEL: atomicrmw_sub_i64_neg:
+; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT: sltu a5, a1, a3
+; RV32I-NEXT: sub a2, a2, a4
+; RV32I-NEXT: sub a2, a2, a5
+; RV32I-NEXT: sub a1, a1, a3
+; RV32I-NEXT: li a3, 5
+; RV32I-NEXT: call __atomic_fetch_sub_8 at plt
+; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: ret
+;
+; RV32IA-LABEL: atomicrmw_sub_i64_neg:
+; RV32IA: # %bb.0:
+; RV32IA-NEXT: addi sp, sp, -16
+; RV32IA-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
+; RV32IA-NEXT: sltu a5, a1, a3
+; RV32IA-NEXT: sub a2, a2, a4
+; RV32IA-NEXT: sub a2, a2, a5
+; RV32IA-NEXT: sub a1, a1, a3
+; RV32IA-NEXT: li a3, 5
+; RV32IA-NEXT: call __atomic_fetch_sub_8 at plt
+; RV32IA-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
+; RV32IA-NEXT: addi sp, sp, 16
+; RV32IA-NEXT: ret
+;
+; RV64I-LABEL: atomicrmw_sub_i64_neg:
+; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -16
+; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT: sub a1, a1, a2
+; RV64I-NEXT: li a2, 5
+; RV64I-NEXT: call __atomic_fetch_sub_8 at plt
+; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT: addi sp, sp, 16
+; RV64I-NEXT: ret
+;
+; RV64IA-LABEL: atomicrmw_sub_i64_neg:
+; RV64IA: # %bb.0:
+; RV64IA-NEXT: sub a2, a2, a1
+; RV64IA-NEXT: amoadd.d.aqrl a0, a2, (a0)
+; RV64IA-NEXT: ret
+ %b = sub i64 %x, %y
+ %1 = atomicrmw sub ptr %a, i64 %b seq_cst
+ ret i64 %1
+}
More information about the llvm-commits
mailing list