[llvm] 7c84f94 - [AArch64] Codegen for FEAT_LRCPC3
Tomas Matheson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 25 04:28:31 PST 2023
Author: Tomas Matheson
Date: 2023-01-25T12:27:23Z
New Revision: 7c84f94eb9f9c39229ca26d21d3bcb0b35efdc71
URL: https://github.com/llvm/llvm-project/commit/7c84f94eb9f9c39229ca26d21d3bcb0b35efdc71
DIFF: https://github.com/llvm/llvm-project/commit/7c84f94eb9f9c39229ca26d21d3bcb0b35efdc71.diff
LOG: [AArch64] Codegen for FEAT_LRCPC3
Implements support for the following 128-bit atomic operations with +rcpc3:
- 128-bit store-release -> STILP
- 128-bit load-acquire -> LDIAPP
D126250 and D137590 added support for emitting LDAPR (Load-Acquire RCPc) rather
than LDAP (Load-Acquire) when +rcpc is available. This patch allows emitting
the 128-bit RCPc instructions added in FEAT_LRCPC3 (LDIAPP/STILP). The
implementation is different from LDAPR, because there are no non-RCPc
equivalents for these new instructions.
Support for the offset variants will be added in D141431.
Reviewed By: lenary
Differential Revision: https://reviews.llvm.org/D141429
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll
llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c1c34c62ce2aa..46db20ecdd742 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -834,7 +834,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::STORE, MVT::i128, Custom);
// Aligned 128-bit loads and stores are single-copy atomic according to the
- // v8.4a spec.
+ // v8.4a spec. LRCPC3 introduces 128-bit STILP/LDIAPP but still requires LSE2.
if (Subtarget->hasLSE2()) {
setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
@@ -2572,8 +2572,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::SSTNT1_PRED)
MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
MAKE_CASE(AArch64ISD::LDP)
+ MAKE_CASE(AArch64ISD::LDIAPP)
MAKE_CASE(AArch64ISD::LDNP)
MAKE_CASE(AArch64ISD::STP)
+ MAKE_CASE(AArch64ISD::STILP)
MAKE_CASE(AArch64ISD::STNP)
MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
@@ -5698,9 +5700,14 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
MemSDNode *StoreNode = cast<MemSDNode>(Op);
assert(StoreNode->getMemoryVT() == MVT::i128);
assert(StoreNode->isVolatile() || StoreNode->isAtomic());
- assert(!StoreNode->isAtomic() ||
- StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
- StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
+
+ bool IsStoreRelease =
+ StoreNode->getMergedOrdering() == AtomicOrdering::Release;
+ if (StoreNode->isAtomic())
+ assert((Subtarget->hasFeature(AArch64::FeatureLSE2) &&
+ Subtarget->hasFeature(AArch64::FeatureRCPC3) && IsStoreRelease) ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Unordered ||
+ StoreNode->getMergedOrdering() == AtomicOrdering::Monotonic);
SDValue Value = StoreNode->getOpcode() == ISD::STORE
? StoreNode->getOperand(1)
@@ -5710,8 +5717,10 @@ SDValue AArch64TargetLowering::LowerStore128(SDValue Op,
DAG.getConstant(0, DL, MVT::i64));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, Value,
DAG.getConstant(1, DL, MVT::i64));
+
+ unsigned Opcode = IsStoreRelease ? AArch64ISD::STILP : AArch64ISD::STP;
SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::STP, DL, DAG.getVTList(MVT::Other),
+ Opcode, DL, DAG.getVTList(MVT::Other),
{StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
StoreNode->getMemoryVT(), StoreNode->getMemOperand());
return Result;
@@ -5984,7 +5993,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerINTRINSIC_VOID(Op, DAG);
case ISD::ATOMIC_STORE:
if (cast<MemSDNode>(Op)->getMemoryVT() == MVT::i128) {
- assert(Subtarget->hasLSE2());
+ assert(Subtarget->hasLSE2() || Subtarget->hasRCPC3());
return LowerStore128(Op, DAG);
}
return SDValue();
@@ -22325,9 +22334,16 @@ void AArch64TargetLowering::ReplaceNodeResults(
}
if (SDValue(N, 0).getValueType() == MVT::i128) {
+ auto *AN = dyn_cast<AtomicSDNode>(LoadNode);
+ bool isLoadAcquire =
+ AN && AN->getSuccessOrdering() == AtomicOrdering::Acquire;
+ unsigned Opcode = isLoadAcquire ? AArch64ISD::LDIAPP : AArch64ISD::LDP;
+
+ if (isLoadAcquire)
+ assert(Subtarget->hasFeature(AArch64::FeatureRCPC3));
+
SDValue Result = DAG.getMemIntrinsicNode(
- AArch64ISD::LDP, SDLoc(N),
- DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
+ Opcode, SDLoc(N), DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
{LoadNode->getChain(), LoadNode->getBasePtr()},
LoadNode->getMemoryVT(), LoadNode->getMemOperand());
@@ -22450,8 +22466,27 @@ bool AArch64TargetLowering::isOpSuitableForLDPSTP(const Instruction *I) const {
return false;
}
+bool AArch64TargetLowering::isOpSuitableForRCPC3(const Instruction *I) const {
+ if (!Subtarget->hasLSE2() || !Subtarget->hasRCPC3())
+ return false;
+
+ if (auto LI = dyn_cast<LoadInst>(I))
+ return LI->getType()->getPrimitiveSizeInBits() == 128 &&
+ LI->getAlign() >= Align(16) &&
+ LI->getOrdering() == AtomicOrdering::Acquire;
+
+ if (auto SI = dyn_cast<StoreInst>(I))
+ return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 128 &&
+ SI->getAlign() >= Align(16) &&
+ SI->getOrdering() == AtomicOrdering::Release;
+
+ return false;
+}
+
bool AArch64TargetLowering::shouldInsertFencesForAtomic(
const Instruction *I) const {
+ if (isOpSuitableForRCPC3(I))
+ return false;
return isOpSuitableForLDPSTP(I);
}
@@ -22485,7 +22520,7 @@ bool AArch64TargetLowering::shouldInsertTrailingFenceForAtomicStore(
TargetLoweringBase::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
- if (Size != 128 || isOpSuitableForLDPSTP(SI))
+ if (Size != 128 || isOpSuitableForLDPSTP(SI) || isOpSuitableForRCPC3(SI))
return AtomicExpansionKind::None;
return AtomicExpansionKind::Expand;
}
@@ -22497,7 +22532,7 @@ TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
- if (Size != 128 || isOpSuitableForLDPSTP(LI))
+ if (Size != 128 || isOpSuitableForLDPSTP(LI) || isOpSuitableForRCPC3(LI))
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 0edec721ed879..92619f7e4c5a3 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -475,8 +475,10 @@ enum NodeType : unsigned {
STZ2G,
LDP,
+ LDIAPP,
LDNP,
STP,
+ STILP,
STNP,
// Memory Operations
@@ -705,6 +707,7 @@ class AArch64TargetLowering : public TargetLowering {
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override;
bool isOpSuitableForLDPSTP(const Instruction *I) const;
+ bool isOpSuitableForRCPC3(const Instruction *I) const;
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
bool
shouldInsertTrailingFenceForAtomicStore(const Instruction *I) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 17fc90afcaab6..5754546627152 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -362,8 +362,10 @@ def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
+def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
// Generates the general dynamic sequences, i.e.
@@ -786,8 +788,10 @@ def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
@@ -3408,7 +3412,7 @@ def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
-// Pair (pre-indexed)
+// Pair (post-indexed)
def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
@@ -8684,6 +8688,9 @@ let Predicates = [HasRCPC3] in {
def LDIAPPW: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
def LDIAPPX: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
+ def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
+ def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
+
// Aliases for when offset=0
def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 186d0ed35d56f..7e5fd1a5f6bdf 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -79,6 +79,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
const LLT &MinFPScalar = HasFP16 ? s16 : s32;
const bool HasCSSC = ST.hasCSSC();
+ const bool HasRCPC3 = ST.hasRCPC3();
getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
.legalFor({p0, s8, s16, s32, s64})
@@ -309,6 +310,10 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
};
getActionDefinitionsBuilder(G_LOAD)
+ .customIf([=](const LegalityQuery &Query) {
+ return HasRCPC3 && Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering == AtomicOrdering::Acquire;
+ })
.customIf([=](const LegalityQuery &Query) {
return Query.Types[0] == s128 &&
Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
@@ -328,16 +333,17 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{v2s64, p0, s128, 8}})
// These extends are also legal
.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 8}})
- .widenScalarToNextPow2(0, /* MinSize = */8)
+ .widenScalarToNextPow2(0, /* MinSize = */ 8)
.lowerIfMemSizeNotByteSizePow2()
.clampScalar(0, s8, s64)
- .narrowScalarIf([=](const LegalityQuery &Query) {
- // Clamp extending load results to 32-bits.
- return Query.Types[0].isScalar() &&
- Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
- Query.Types[0].getSizeInBits() > 32;
- },
- changeTo(0, s32))
+ .narrowScalarIf(
+ [=](const LegalityQuery &Query) {
+ // Clamp extending load results to 32-bits.
+ return Query.Types[0].isScalar() &&
+ Query.Types[0] != Query.MMODescrs[0].MemoryTy &&
+ Query.Types[0].getSizeInBits() > 32;
+ },
+ changeTo(0, s32))
.clampMaxNumElements(0, s8, 16)
.clampMaxNumElements(0, s16, 8)
.clampMaxNumElements(0, s32, 4)
@@ -347,31 +353,25 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
.scalarizeIf(typeIs(0, v2s16), 0);
getActionDefinitionsBuilder(G_STORE)
+ .customIf([=](const LegalityQuery &Query) {
+ return HasRCPC3 && Query.Types[0] == s128 &&
+ Query.MMODescrs[0].Ordering == AtomicOrdering::Release;
+ })
.customIf([=](const LegalityQuery &Query) {
return Query.Types[0] == s128 &&
Query.MMODescrs[0].Ordering != AtomicOrdering::NotAtomic;
})
- .legalForTypesWithMemDesc({{s8, p0, s8, 8},
- {s16, p0, s8, 8}, // truncstorei8 from s16
- {s32, p0, s8, 8}, // truncstorei8 from s32
- {s64, p0, s8, 8}, // truncstorei8 from s64
- {s16, p0, s16, 8},
- {s32, p0, s16, 8}, // truncstorei16 from s32
- {s64, p0, s16, 8}, // truncstorei16 from s64
- {s32, p0, s8, 8},
- {s32, p0, s16, 8},
- {s32, p0, s32, 8},
- {s64, p0, s64, 8},
- {s64, p0, s32, 8}, // truncstorei32 from s64
- {p0, p0, s64, 8},
- {s128, p0, s128, 8},
- {v16s8, p0, s128, 8},
- {v8s8, p0, s64, 8},
- {v4s16, p0, s64, 8},
- {v8s16, p0, s128, 8},
- {v2s32, p0, s64, 8},
- {v4s32, p0, s128, 8},
- {v2s64, p0, s128, 8}})
+ .legalForTypesWithMemDesc(
+ {{s8, p0, s8, 8}, {s16, p0, s8, 8}, // truncstorei8 from s16
+ {s32, p0, s8, 8}, // truncstorei8 from s32
+ {s64, p0, s8, 8}, // truncstorei8 from s64
+ {s16, p0, s16, 8}, {s32, p0, s16, 8}, // truncstorei16 from s32
+ {s64, p0, s16, 8}, // truncstorei16 from s64
+ {s32, p0, s8, 8}, {s32, p0, s16, 8}, {s32, p0, s32, 8},
+ {s64, p0, s64, 8}, {s64, p0, s32, 8}, // truncstorei32 from s64
+ {p0, p0, s64, 8}, {s128, p0, s128, 8}, {v16s8, p0, s128, 8},
+ {v8s8, p0, s64, 8}, {v4s16, p0, s64, 8}, {v8s16, p0, s128, 8},
+ {v2s32, p0, s64, 8}, {v4s32, p0, s128, 8}, {v2s64, p0, s128, 8}})
.clampScalar(0, s8, s64)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
@@ -1188,27 +1188,49 @@ bool AArch64LegalizerInfo::legalizeLoadStore(
const LLT ValTy = MRI.getType(ValReg);
if (ValTy == LLT::scalar(128)) {
- assert((*MI.memoperands_begin())->getSuccessOrdering() ==
- AtomicOrdering::Monotonic ||
- (*MI.memoperands_begin())->getSuccessOrdering() ==
- AtomicOrdering::Unordered);
- assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
+
+ AtomicOrdering Ordering = (*MI.memoperands_begin())->getSuccessOrdering();
+ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+ bool IsLoadAcquire = IsLoad && Ordering == AtomicOrdering::Acquire;
+ bool IsStoreRelease = !IsLoad && Ordering == AtomicOrdering::Release;
+ bool IsRcpC3 =
+ ST->hasLSE2() && ST->hasRCPC3() && (IsLoadAcquire || IsStoreRelease);
+
LLT s64 = LLT::scalar(64);
+
+ unsigned Opcode;
+ if (IsRcpC3) {
+ Opcode = IsLoad ? AArch64::LDIAPPX : AArch64::STILPX;
+ } else {
+ // For LSE2, loads/stores should have been converted to monotonic and had
+ // a fence inserted after them.
+ assert(Ordering == AtomicOrdering::Monotonic ||
+ Ordering == AtomicOrdering::Unordered);
+ assert(ST->hasLSE2() && "ldp/stp not single copy atomic without +lse2");
+
+ Opcode = IsLoad ? AArch64::LDPXi : AArch64::STPXi;
+ }
+
MachineInstrBuilder NewI;
- if (MI.getOpcode() == TargetOpcode::G_LOAD) {
- NewI = MIRBuilder.buildInstr(AArch64::LDPXi, {s64, s64}, {});
+ if (IsLoad) {
+ NewI = MIRBuilder.buildInstr(Opcode, {s64, s64}, {});
MIRBuilder.buildMergeLikeInstr(
ValReg, {NewI->getOperand(0), NewI->getOperand(1)});
} else {
auto Split = MIRBuilder.buildUnmerge(s64, MI.getOperand(0));
NewI = MIRBuilder.buildInstr(
- AArch64::STPXi, {}, {Split->getOperand(0), Split->getOperand(1)});
+ Opcode, {}, {Split->getOperand(0), Split->getOperand(1)});
+ }
+
+ if (IsRcpC3) {
+ NewI.addUse(MI.getOperand(1).getReg());
+ } else {
+ Register Base;
+ int Offset;
+ matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
+ NewI.addUse(Base);
+ NewI.addImm(Offset / 8);
}
- Register Base;
- int Offset;
- matchLDPSTPAddrMode(MI.getOperand(1).getReg(), Base, Offset, MRI);
- NewI.addUse(Base);
- NewI.addImm(Offset / 8);
NewI.cloneMemRefs(MI);
constrainSelectedInstRegOperands(*NewI, *ST->getInstrInfo(),
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll
index df73e52c0c1d9..435f0a0c01765 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-load-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
; CHECK-LABEL: load_atomic_i8_aligned_unordered:
@@ -228,121 +228,59 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
}
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_unordered:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_unordered:
-; -O1: ldxp x0, x1, [x8]
-; -O1: stxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_unordered:
+; CHECK: ldp x0, x1, [x0]
%r = load atomic i128, ptr %ptr unordered, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_unordered_const:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_unordered_const:
-; -O1: ldxp x0, x1, [x8]
-; -O1: stxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
+; CHECK: ldp x0, x1, [x0]
%r = load atomic i128, ptr %ptr unordered, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_monotonic:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_monotonic:
-; -O1: ldxp x0, x1, [x8]
-; -O1: stxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
+; CHECK: ldp x0, x1, [x0]
%r = load atomic i128, ptr %ptr monotonic, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_monotonic_const:
-; -O0: ldxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_monotonic_const:
-; -O1: ldxp x0, x1, [x8]
-; -O1: stxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
+; CHECK: ldp x0, x1, [x0]
%r = load atomic i128, ptr %ptr monotonic, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_acquire:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_acquire:
-; -O1: ldaxp x0, x1, [x8]
-; -O1: stxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_acquire:
+; CHECK: ldiapp x0, x1, [x0]
%r = load atomic i128, ptr %ptr acquire, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_acquire_const:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_acquire_const:
-; -O1: ldaxp x0, x1, [x8]
-; -O1: stxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
+; CHECK: ldiapp x0, x1, [x0]
%r = load atomic i128, ptr %ptr acquire, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_seq_cst:
-; -O1: ldaxp x0, x1, [x8]
-; -O1: stlxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
+; CHECK: ldp x0, x1, [x0]
+; CHECK: dmb ish
%r = load atomic i128, ptr %ptr seq_cst, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; -O0: ldaxp x0, x1, [x9]
-; -O0: cmp x0, x10
-; -O0: cmp x1, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x0, x1, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; -O1: ldaxp x0, x1, [x8]
-; -O1: stlxp w9, x0, x1, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
+; CHECK: ldp x0, x1, [x0]
+; CHECK: dmb ish
%r = load atomic i128, ptr %ptr seq_cst, align 16
ret i128 %r
}
@@ -626,3 +564,6 @@ define dso_local i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %pt
%r = load atomic i128, ptr %ptr seq_cst, align 1
ret i128 %r
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll
index cf7d512548b14..8e1ae1d0a754c 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomic-store-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local void @store_atomic_i8_aligned_unordered(i8 %value, ptr %ptr) {
; CHECK-LABEL: store_atomic_i8_aligned_unordered:
@@ -116,77 +116,31 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
}
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_unordered:
-; -O1: ldxp xzr, x8, [x2]
-; -O1: stxp w8, x0, x1, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_unordered:
+; CHECK: stp x0, x1, [x2]
store atomic i128 %value, ptr %ptr unordered, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stxp w8, x14, x15, [x11]
-; -O0: stxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_monotonic:
-; -O1: ldxp xzr, x8, [x2]
-; -O1: stxp w8, x0, x1, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
+; CHECK: stp x0, x1, [x2]
store atomic i128 %value, ptr %ptr monotonic, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: ldxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_release:
-; -O1: ldxp xzr, x8, [x2]
-; -O1: stlxp w8, x0, x1, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_release:
+; CHECK: stilp x0, x1, [x2]
store atomic i128 %value, ptr %ptr release, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x9, [x11]
-; -O0: cmp x10, x12
-; -O0: cmp x9, x13
-; -O0: stlxp w8, x14, x15, [x11]
-; -O0: stlxp w8, x10, x9, [x11]
-; -O0: eor x8, x10, x8
-; -O0: eor x11, x9, x11
-; -O0: orr x8, x8, x11
-; -O0: subs x8, x8, #0
-;
-; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O1: ldaxp xzr, x8, [x2]
-; -O1: stlxp w8, x0, x1, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
+; CHECK: dmb ish
+; CHECK: stp x0, x1, [x2]
+; CHECK: dmb ish
store atomic i128 %value, ptr %ptr seq_cst, align 16
ret void
}
@@ -330,3 +284,6 @@ define dso_local void @store_atomic_i128_unaligned_seq_cst(i128 %value, ptr %ptr
store atomic i128 %value, ptr %ptr seq_cst, align 1
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 838f4529c4dc7..5004db8d26560 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
index 595bbaba4457a..2d56cf802c4f8 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-cmpxchg-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll
index db00aca9110b6..a0928b38b6687 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-fence.ll
@@ -10,8 +10,8 @@
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+outline-atomics -O0 | FileCheck %s --check-prefixes=CHECK,-O0
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+outline-atomics -O1 | FileCheck %s --check-prefixes=CHECK,-O1
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse128 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64 -mattr=+lse128 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
index 517baa6f2f8b6..a09b4c69755d5 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-load-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @load_atomic_i8_aligned_unordered(ptr %ptr) {
; CHECK-LABEL: load_atomic_i8_aligned_unordered:
@@ -228,121 +228,59 @@ define dso_local i64 @load_atomic_i64_aligned_seq_cst_const(ptr readonly %ptr) {
}
define dso_local i128 @load_atomic_i128_aligned_unordered(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_unordered:
-; -O0: ldxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_unordered:
-; -O1: ldxp x1, x0, [x8]
-; -O1: stxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_unordered:
+; CHECK: ldp x1, x0, [x0]
%r = load atomic i128, ptr %ptr unordered, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_unordered_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_unordered_const:
-; -O0: ldxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_unordered_const:
-; -O1: ldxp x1, x0, [x8]
-; -O1: stxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_unordered_const:
+; CHECK: ldp x1, x0, [x0]
%r = load atomic i128, ptr %ptr unordered, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_monotonic(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_monotonic:
-; -O0: ldxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_monotonic:
-; -O1: ldxp x1, x0, [x8]
-; -O1: stxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_monotonic:
+; CHECK: ldp x1, x0, [x0]
%r = load atomic i128, ptr %ptr monotonic, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_monotonic_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_monotonic_const:
-; -O0: ldxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_monotonic_const:
-; -O1: ldxp x1, x0, [x8]
-; -O1: stxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_monotonic_const:
+; CHECK: ldp x1, x0, [x0]
%r = load atomic i128, ptr %ptr monotonic, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_acquire(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_acquire:
-; -O0: ldaxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_acquire:
-; -O1: ldaxp x1, x0, [x8]
-; -O1: stxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_acquire:
+; CHECK: ldiapp x1, x0, [x0]
%r = load atomic i128, ptr %ptr acquire, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_acquire_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_acquire_const:
-; -O0: ldaxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stxp w8, x10, x10, [x9]
-; -O0: stxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_acquire_const:
-; -O1: ldaxp x1, x0, [x8]
-; -O1: stxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_acquire_const:
+; CHECK: ldiapp x1, x0, [x0]
%r = load atomic i128, ptr %ptr acquire, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_seq_cst(ptr %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_seq_cst:
-; -O1: ldaxp x1, x0, [x8]
-; -O1: stlxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_seq_cst:
+; CHECK: ldp x1, x0, [x0]
+; CHECK: dmb ish
%r = load atomic i128, ptr %ptr seq_cst, align 16
ret i128 %r
}
define dso_local i128 @load_atomic_i128_aligned_seq_cst_const(ptr readonly %ptr) {
-; -O0-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; -O0: ldaxp x1, x0, [x9]
-; -O0: cmp x1, x10
-; -O0: cmp x0, x10
-; -O0: stlxp w8, x10, x10, [x9]
-; -O0: stlxp w8, x1, x0, [x9]
-;
-; -O1-LABEL: load_atomic_i128_aligned_seq_cst_const:
-; -O1: ldaxp x1, x0, [x8]
-; -O1: stlxp w9, x1, x0, [x8]
+; CHECK-LABEL: load_atomic_i128_aligned_seq_cst_const:
+; CHECK: ldp x1, x0, [x0]
+; CHECK: dmb ish
%r = load atomic i128, ptr %ptr seq_cst, align 16
ret i128 %r
}
@@ -626,3 +564,6 @@ define dso_local i128 @load_atomic_i128_unaligned_seq_cst_const(ptr readonly %pt
%r = load atomic i128, ptr %ptr seq_cst, align 1
ret i128 %r
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
index 8e067fc74b56b..f8f8fe7cd6fa5 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomic-store-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local void @store_atomic_i8_aligned_unordered(i8 %value, ptr %ptr) {
; CHECK-LABEL: store_atomic_i8_aligned_unordered:
@@ -116,69 +116,31 @@ define dso_local void @store_atomic_i64_aligned_seq_cst(i64 %value, ptr %ptr) {
}
define dso_local void @store_atomic_i128_aligned_unordered(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_unordered:
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_unordered:
-; -O1: ldxp xzr, x8, [x2]
-; -O1: stxp w8, x1, x0, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_unordered:
+; CHECK: stp x1, x0, [x2]
store atomic i128 %value, ptr %ptr unordered, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_monotonic(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_monotonic:
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_monotonic:
-; -O1: ldxp xzr, x8, [x2]
-; -O1: stxp w8, x1, x0, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_monotonic:
+; CHECK: stp x1, x0, [x2]
store atomic i128 %value, ptr %ptr monotonic, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_release(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_release:
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_release:
-; -O1: ldxp xzr, x8, [x2]
-; -O1: stlxp w8, x1, x0, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_release:
+; CHECK: stilp x1, x0, [x2]
store atomic i128 %value, ptr %ptr release, align 16
ret void
}
define dso_local void @store_atomic_i128_aligned_seq_cst(i128 %value, ptr %ptr) {
-; -O0-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
-;
-; -O1-LABEL: store_atomic_i128_aligned_seq_cst:
-; -O1: ldaxp xzr, x8, [x2]
-; -O1: stlxp w8, x1, x0, [x2]
+; CHECK-LABEL: store_atomic_i128_aligned_seq_cst:
+; CHECK: dmb ish
+; CHECK: stp x1, x0, [x2]
+; CHECK: dmb ish
store atomic i128 %value, ptr %ptr seq_cst, align 16
ret void
}
@@ -322,3 +284,6 @@ define dso_local void @store_atomic_i128_unaligned_seq_cst(i128 %value, ptr %ptr
store atomic i128 %value, ptr %ptr seq_cst, align 1
ret void
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; -O0: {{.*}}
+; -O1: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
index ec0300264b06a..9458345fef72e 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @atomicrmw_xchg_i8_aligned_monotonic(ptr %ptr, i8 %value) {
; -O0-LABEL: atomicrmw_xchg_i8_aligned_monotonic:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
index 804c9c7cb10e9..decae2be2cffd 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-cmpxchg-rcpc3.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "\b(sp)\b" --filter "^\s*(ld[^r]|st[^r]|swp|cas|bl|add|and|eor|orn|orr|sub|mvn|sxt|cmp|ccmp|csel|dmb)"
; The base test file was generated by ./llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
define dso_local i8 @cmpxchg_i8_aligned_monotonic_monotonic(i8 %expected, i8 %new, ptr %ptr) {
; -O0-LABEL: cmpxchg_i8_aligned_monotonic_monotonic:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll
index 7a9b5e22e46ae..82c1042c64a5f 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-fence.ll
@@ -10,8 +10,8 @@
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+outline-atomics -O0 | FileCheck %s --check-prefixes=CHECK,-O0
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+outline-atomics -O1 | FileCheck %s --check-prefixes=CHECK,-O1
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
-; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
+; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse2,+rcpc3 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse128 -O0 | FileCheck %s --check-prefixes=CHECK,-O0
; RUN: llc %s -o - -verify-machineinstrs -mtriple=aarch64_be -mattr=+lse128 -O1 | FileCheck %s --check-prefixes=CHECK,-O1
diff --git a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
index 9104dab0cfda2..d0bc0a0435203 100755
--- a/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
+++ b/llvm/test/CodeGen/AArch64/Atomics/generate-tests.py
@@ -105,21 +105,24 @@ def __str__(self) -> str:
class Feature(enum.Flag):
+ # Feature names in filenames are determined by the spelling here:
v8a = enum.auto()
v8_1a = enum.auto() # -mattr=+v8.1a, mandatory FEAT_LOR, FEAT_LSE
rcpc = enum.auto() # FEAT_LRCPC
lse2 = enum.auto() # FEAT_LSE2
outline_atomics = enum.auto() # -moutline-atomics
- rcpc3 = enum.auto() # FEAT_LRCPC3
+ rcpc3 = enum.auto() # FEAT_LSE2 + FEAT_LRCPC3
lse128 = enum.auto() # FEAT_LSE128
@property
def mattr(self):
if self == Feature.outline_atomics:
- return 'outline-atomics'
+ return '+outline-atomics'
if self == Feature.v8_1a:
- return 'v8.1a'
- return self.name
+ return '+v8.1a'
+ if self == Feature.rcpc3:
+ return '+lse2,+rcpc3'
+ return '+' + self.name
ATOMICRMW_OPS = [
@@ -230,7 +233,7 @@ def header(f, triple, features, filter_args: str):
for OptFlag in ['-O0', '-O1']:
f.write(' '.join([
';', 'RUN:', 'llc', '%s', '-o', '-', '-verify-machineinstrs',
- f'-mtriple={triple}', f'-mattr=+{feat.mattr}', OptFlag, '|',
+ f'-mtriple={triple}', f'-mattr={feat.mattr}', OptFlag, '|',
'FileCheck', '%s', f'--check-prefixes=CHECK,{OptFlag}\n'
]))
More information about the llvm-commits
mailing list