[llvm] f1f5833 - [AArch64] Codegen for FEAT_LSE128
Tomas Matheson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 04:02:44 PST 2023
Author: Tomas Matheson
Date: 2023-01-25T12:02:00Z
New Revision: f1f583347d00aad378eb0128e72d3d2e8be5174b
URL: https://github.com/llvm/llvm-project/commit/f1f583347d00aad378eb0128e72d3d2e8be5174b
DIFF: https://github.com/llvm/llvm-project/commit/f1f583347d00aad378eb0128e72d3d2e8be5174b.diff
LOG: [AArch64] Codegen for FEAT_LSE128
Codegen support for 128-bit atomicrmw (and|or|xchg).
- store atomic -> swpp
- atomicrmw xchg -> swpp
- atomicrmw and -> ldclrp
- atomicrmw or -> ldsetp
Reviewed By: lenary
Differential Revision: https://reviews.llvm.org/D141406
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6f2058c721577..c1c34c62ce2aa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -821,6 +821,14 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
#undef LCALLNAME5
}
+ if (Subtarget->hasLSE128()) {
+ // Custom lowering because i128 is not legal. Must be replaced by 2x64
+ // values. ATOMIC_LOAD_AND also needs op legalisation to emit LDCLRP.
+ setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i128, Custom);
+ setOperationAction(ISD::ATOMIC_SWAP, MVT::i128, Custom);
+ }
+
// 128-bit loads and stores can be done without expanding
setOperationAction(ISD::LOAD, MVT::i128, Custom);
setOperationAction(ISD::STORE, MVT::i128, Custom);
@@ -13301,12 +13309,14 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
SelectionDAG &DAG) const {
auto &Subtarget = DAG.getSubtarget<AArch64Subtarget>();
+ // No point replacing if we don't have the relevant instruction/libcall anyway
if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
return SDValue();
// LSE has an atomic load-clear instruction, but not a load-and.
SDLoc dl(Op);
MVT VT = Op.getSimpleValueType();
+ assert(VT != MVT::i128 && "Handled elsewhere, code replicated.");
SDValue RHS = Op.getOperand(2);
AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
@@ -22082,6 +22092,137 @@ static void ReplaceCMP_SWAP_128Results(SDNode *N,
Results.push_back(SDValue(CmpSwap, 3));
}
+static unsigned getAtomicLoad128Opcode(unsigned ISDOpcode,
+ AtomicOrdering Ordering) {
+ // ATOMIC_LOAD_CLR only appears when lowering ATOMIC_LOAD_AND (see
+ // LowerATOMIC_LOAD_AND). We can't take that approach with 128-bit, because
+ // the type is not legal. Therefore we shouldn't expect to see a 128-bit
+ // ATOMIC_LOAD_CLR at any point.
+ assert(ISDOpcode != ISD::ATOMIC_LOAD_CLR &&
+ "ATOMIC_LOAD_AND should be lowered to LDCLRP directly");
+ assert(ISDOpcode != ISD::ATOMIC_LOAD_ADD && "There is no 128 bit LDADD");
+ assert(ISDOpcode != ISD::ATOMIC_LOAD_SUB && "There is no 128 bit LDSUB");
+
+ if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
+ // The operand will need to be XORed in a separate step.
+ switch (Ordering) {
+ case AtomicOrdering::Monotonic:
+ return AArch64::LDCLRP;
+ break;
+ case AtomicOrdering::Acquire:
+ return AArch64::LDCLRPA;
+ break;
+ case AtomicOrdering::Release:
+ return AArch64::LDCLRPL;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return AArch64::LDCLRPAL;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+ }
+
+ if (ISDOpcode == ISD::ATOMIC_LOAD_OR) {
+ switch (Ordering) {
+ case AtomicOrdering::Monotonic:
+ return AArch64::LDSETP;
+ break;
+ case AtomicOrdering::Acquire:
+ return AArch64::LDSETPA;
+ break;
+ case AtomicOrdering::Release:
+ return AArch64::LDSETPL;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return AArch64::LDSETPAL;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+ }
+
+ if (ISDOpcode == ISD::ATOMIC_SWAP) {
+ switch (Ordering) {
+ case AtomicOrdering::Monotonic:
+ return AArch64::SWPP;
+ break;
+ case AtomicOrdering::Acquire:
+ return AArch64::SWPPA;
+ break;
+ case AtomicOrdering::Release:
+ return AArch64::SWPPL;
+ break;
+ case AtomicOrdering::AcquireRelease:
+ case AtomicOrdering::SequentiallyConsistent:
+ return AArch64::SWPPAL;
+ break;
+ default:
+ llvm_unreachable("Unexpected ordering!");
+ }
+ }
+
+ llvm_unreachable("Unexpected ISDOpcode!");
+}
+
+static void ReplaceATOMIC_LOAD_128Results(SDNode *N,
+ SmallVectorImpl<SDValue> &Results,
+ SelectionDAG &DAG,
+ const AArch64Subtarget *Subtarget) {
+ // LSE128 has a 128-bit RMW ops, but i128 is not a legal type, so lower it
+ // here. This follows the approach of the CMP_SWAP_XXX pseudo instructions
+ // rather than the CASP instructions, because CASP has register classes for
+ // the pairs of registers and therefore uses REG_SEQUENCE and EXTRACT_SUBREG
+ // to present them as single operands. LSE128 instructions use the GPR64
+ // register class (because the pair does not have to be sequential), like
+ // CMP_SWAP_XXX, and therefore we use TRUNCATE and BUILD_PAIR.
+
+ assert(N->getValueType(0) == MVT::i128 &&
+ "AtomicLoadXXX on types less than 128 should be legal");
+
+ if (!Subtarget->hasLSE128())
+ return;
+
+ MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
+ const SDValue &Chain = N->getOperand(0);
+ const SDValue &Ptr = N->getOperand(1);
+ const SDValue &Val128 = N->getOperand(2);
+ std::pair<SDValue, SDValue> Val2x64 = splitInt128(Val128, DAG);
+
+ const unsigned ISDOpcode = N->getOpcode();
+ const unsigned MachineOpcode =
+ getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
+
+ if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
+ SDLoc dl(Val128);
+ Val2x64.first =
+ DAG.getNode(ISD::XOR, dl, MVT::i64,
+ DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.first);
+ Val2x64.second =
+ DAG.getNode(ISD::XOR, dl, MVT::i64,
+ DAG.getConstant(-1ULL, dl, MVT::i64), Val2x64.second);
+ }
+
+ SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Ops[0], Ops[1]);
+
+ MachineSDNode *AtomicInst =
+ DAG.getMachineNode(MachineOpcode, SDLoc(N),
+ DAG.getVTList(MVT::i64, MVT::i64, MVT::Other), Ops);
+
+ DAG.setNodeMemRefs(AtomicInst, {MemOp});
+
+ SDValue Lo = SDValue(AtomicInst, 0), Hi = SDValue(AtomicInst, 1);
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
+ Results.push_back(SDValue(AtomicInst, 2)); // Chain out
+}
+
void AArch64TargetLowering::ReplaceNodeResults(
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
switch (N->getOpcode()) {
@@ -22135,6 +22276,20 @@ void AArch64TargetLowering::ReplaceNodeResults(
case ISD::ATOMIC_CMP_SWAP:
ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
return;
+ case ISD::ATOMIC_LOAD_CLR:
+ assert(N->getValueType(0) != MVT::i128 &&
+ "128-bit ATOMIC_LOAD_AND should be lowered directly to LDCLRP");
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_SWAP: {
+ AtomicSDNode *AN = cast<AtomicSDNode>(N);
+ assert(AN->getVal().getValueType() == MVT::i128 &&
+ "Expected 128-bit atomicrmw.");
+ // These need custom type legalisation so we go directly to instruction.
+ ReplaceATOMIC_LOAD_128Results(N, Results, DAG, Subtarget);
+ return;
+ }
case ISD::ATOMIC_LOAD:
case ISD::LOAD: {
MemSDNode *LoadNode = cast<MemSDNode>(N);
@@ -22368,6 +22523,13 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
if (Size > 128) return AtomicExpansionKind::None;
+ bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
+ (AI->getOperation() == AtomicRMWInst::Xchg ||
+ AI->getOperation() == AtomicRMWInst::Or ||
+ AI->getOperation() == AtomicRMWInst::And);
+ if (CanUseLSE128)
+ return AtomicExpansionKind::None;
+
// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll
index e8437d86ae336..9ba794b674868 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-lse128.ll
@@ -116,69 +116,29 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
}
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_unordered:
-; -O1: ldp x4, x5, [x2]
-; -O1: casp x6, x7, x0, x1, [x2]
-; -O1: cmp x7, x5
-; -O1: ccmp x6, x4, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_unordered:
+; CHECK: swpp x0, x1, [x2]
store atomic i128 %value, ptr %ptr unordered, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x2]
-; -O1: casp x6, x7, x0, x1, [x2]
-; -O1: cmp x7, x5
-; -O1: ccmp x6, x4, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
+; CHECK: swpp x0, x1, [x2]
store atomic i128 %value, ptr %ptr monotonic, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_release:
-; -O1: ldp x4, x5, [x2]
-; -O1: caspl x6, x7, x0, x1, [x2]
-; -O1: cmp x7, x5
-; -O1: ccmp x6, x4, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_release:
+; CHECK: swppl x0, x1, [x2]
store atomic i128 %value, ptr %ptr release, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x2]
-; -O1: caspal x6, x7, x0, x1, [x2]
-; -O1: cmp x7, x5
-; -O1: ccmp x6, x4, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
+; CHECK: swppal x0, x1, [x2]
store atomic i128 %value, ptr %ptr seq_cst, align 16
ret void
}
@@ -322,3 +282,6 @@ define dso_local void @store_atomic_i128_unaligned_seq_cst(i128 %value, ptr %ptr
store atomic i128 %value, ptr %ptr seq_cst, align 1
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll
index 27aedd35c5d02..343ea36aeb73b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse128.ll
@@ -145,85 +145,50 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: swpp x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x0]
-; -O1: casp x4, x5, x2, x3, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: swpp x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value monotonic, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire:
-; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: swppa x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspa x4, x5, x2, x3, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: swppa x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value acquire, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_release:
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: swppl x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspl x4, x5, x2, x3, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: swppl x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value release, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: swppal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspal x4, x5, x2, x3, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: swppal x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value acq_rel, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: swppal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspal x4, x5, x2, x3, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: swppal x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value seq_cst, align 16
ret i128 %r
}
@@ -1575,105 +1540,70 @@ define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_monotonic:
-; -O0: and x2, x9, x11
-; -O0: and x9, x9, x10
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrp x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: casp x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrp x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_acquire:
-; -O0: and x2, x9, x11
-; -O0: and x9, x9, x10
-; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpa x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspa x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpa x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_release:
-; -O0: and x2, x9, x11
-; -O0: and x9, x9, x10
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpl x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_release:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspl x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpl x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value release, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_acq_rel:
-; -O0: and x2, x9, x11
-; -O0: and x9, x9, x10
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpal x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_seq_cst:
-; -O0: and x2, x9, x11
-; -O0: and x9, x9, x10
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpal x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16
ret i128 %r
}
@@ -2955,105 +2885,50 @@ define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_monotonic:
-; -O0: orr x2, x9, x11
-; -O0: orr x9, x9, x10
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: ldsetp x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: casp x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: ldsetp x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_acquire:
-; -O0: orr x2, x9, x11
-; -O0: orr x9, x9, x10
-; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: ldsetpa x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspa x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: ldsetpa x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_release:
-; -O0: orr x2, x9, x11
-; -O0: orr x9, x9, x10
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: ldsetpl x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_release:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspl x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: ldsetpl x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value release, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_acq_rel:
-; -O0: orr x2, x9, x11
-; -O0: orr x9, x9, x10
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: ldsetpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: ldsetpal x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_seq_cst:
-; -O0: orr x2, x9, x11
-; -O0: orr x9, x9, x10
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
+; -O0: ldsetpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x5, x7
-; -O1: ccmp x4, x6, #0, eq
+; -O1: ldsetpal x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16
ret i128 %r
}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll
index 913abeb20f9bc..587f04baf11dc 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-lse128.ll
@@ -116,61 +116,29 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
}
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_unordered:
-; -O1: ldp x4, x5, [x2]
-; -O1: casp x6, x7, x0, x1, [x2]
-; -O1: cmp x6, x4
-; -O1: ccmp x7, x5, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_unordered:
+; CHECK: swpp x0, x1, [x2]
store atomic i128 %value, ptr %ptr unordered, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x2]
-; -O1: casp x6, x7, x0, x1, [x2]
-; -O1: cmp x6, x4
-; -O1: ccmp x7, x5, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
+; CHECK: swpp x0, x1, [x2]
store atomic i128 %value, ptr %ptr monotonic, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_release:
-; -O1: ldp x4, x5, [x2]
-; -O1: caspl x6, x7, x0, x1, [x2]
-; -O1: cmp x6, x4
-; -O1: ccmp x7, x5, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_release:
+; CHECK: swppl x0, x1, [x2]
store atomic i128 %value, ptr %ptr release, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x2]
-; -O1: caspal x6, x7, x0, x1, [x2]
-; -O1: cmp x6, x4
-; -O1: ccmp x7, x5, #0, eq
+; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
+; CHECK: swppal x0, x1, [x2]
store atomic i128 %value, ptr %ptr seq_cst, align 16
ret void
}
@@ -314,3 +282,6 @@ define dso_local void @store_atomic_i128_unaligned_seq_cst(i128 %value, ptr %ptr
store atomic i128 %value, ptr %ptr seq_cst, align 1
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll
index 9d4222ec6ade6..18c43124f3236 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse128.ll
@@ -145,75 +145,50 @@ define dso_local i64 @atomicrmw_xchg_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_xchg_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: swpp x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x0]
-; -O1: casp x4, x5, x2, x3, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: swpp x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value monotonic, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acquire:
-; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: swppa x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acquire:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspa x4, x5, x2, x3, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: swppa x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value acquire, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_release:
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: swppl x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_release:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspl x4, x5, x2, x3, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: swppl x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value release, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: swppal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_acq_rel:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspal x4, x5, x2, x3, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: swppal x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value acq_rel, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_xchg_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: swppal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_xchg_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x0]
-; -O1: caspal x4, x5, x2, x3, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: swppal x2, x1, [x0]
%r = atomicrmw xchg ptr %ptr, i128 %value seq_cst, align 16
ret i128 %r
}
@@ -1665,95 +1640,70 @@ define dso_local i64 @atomicrmw_and_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_and_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_monotonic:
-; -O0: and x2, x11, x12
-; -O0: and x9, x10, x9
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrp x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: casp x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrp x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_acquire:
-; -O0: and x2, x11, x12
-; -O0: and x9, x10, x9
-; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpa x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_acquire:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspa x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpa x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value acquire, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_release:
-; -O0: and x2, x11, x12
-; -O0: and x9, x10, x9
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpl x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_release:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspl x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpl x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value release, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_acq_rel:
-; -O0: and x2, x11, x12
-; -O0: and x9, x10, x9
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpal x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_and_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_and_i128_aligned_seq_cst:
-; -O0: and x2, x11, x12
-; -O0: and x9, x10, x9
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: mvn x1, x3
+; -O0: mvn x0, x2
+; -O0: ldclrpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x0]
-; -O1: and x8, x4, x2
-; -O1: and x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: mvn x1, x3
+; -O1: mvn x8, x2
+; -O1: ldclrpal x8, x1, [x0]
%r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16
ret i128 %r
}
@@ -3040,95 +2990,50 @@ define dso_local i64 @atomicrmw_or_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_or_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_monotonic:
-; -O0: orr x2, x11, x12
-; -O0: orr x9, x10, x9
-; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: ldsetp x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: casp x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: ldsetp x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_acquire:
-; -O0: orr x2, x11, x12
-; -O0: orr x9, x10, x9
-; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: ldsetpa x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_acquire:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspa x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: ldsetpa x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value acquire, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_release:
-; -O0: orr x2, x11, x12
-; -O0: orr x9, x10, x9
-; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: ldsetpl x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_release:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspl x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: ldsetpl x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value release, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_acq_rel:
-; -O0: orr x2, x11, x12
-; -O0: orr x9, x10, x9
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: ldsetpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: ldsetpal x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16
ret i128 %r
}
define dso_local i128 @atomicrmw_or_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_or_i128_aligned_seq_cst:
-; -O0: orr x2, x11, x12
-; -O0: orr x9, x10, x9
-; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x9, x11
-; -O0: ccmp x8, x10, #0, eq
+; -O0: ldsetpal x0, x1, [x8]
;
; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst:
-; -O1: ldp x4, x5, [x0]
-; -O1: orr x8, x4, x2
-; -O1: orr x9, x7, x3
-; -O1: caspal x4, x5, x8, x9, [x0]
-; -O1: cmp x4, x6
-; -O1: ccmp x5, x7, #0, eq
+; -O1: ldsetpal x2, x1, [x0]
%r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16
ret i128 %r
}
More information about the llvm-commits
mailing list