[llvm] 84a6a05 - [AArch64][GlobalISel] Select G_UADDE/G_SADDE/G_USUBE/G_SSUBE
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Sun Jun 25 14:32:07 PDT 2023
Author: Tobias Stadler
Date: 2023-06-25T14:32:00-07:00
New Revision: 84a6a057e60be1f3f46cf1f02e9ba190dab805da
URL: https://github.com/llvm/llvm-project/commit/84a6a057e60be1f3f46cf1f02e9ba190dab805da
DIFF: https://github.com/llvm/llvm-project/commit/84a6a057e60be1f3f46cf1f02e9ba190dab805da.diff
LOG: [AArch64][GlobalISel] Select G_UADDE/G_SADDE/G_USUBE/G_SSUBE
This implements the remaining overflow generating instructions in the AArch64
GlobalISel selector. Now wide add/sub operations do not fallback to SelectionDAG
anymore. We make use of PostSelectOptimize to cleanup the hereby generated
flag-setting operations when the carry-out is unused. Since we do not fallback
anymore when selecting add/sub atomics on O0 some test changes were required
there.
Fixes: https://github.com/llvm/llvm-project/issues/59407
Differential Revision: https://reviews.llvm.org/D153164
Added:
llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
llvm/test/CodeGen/AArch64/popcount.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 049efa672f5be..8484d970aff0a 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -258,6 +258,106 @@ class GFCmp : public GAnyCmp {
}
};
+/// Represents overflowing binary operations.
+/// Only carry-out:
+/// G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_UMULO, G_SMULO
+/// Carry-in and carry-out:
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GBinOpCarryOut : public GenericMachineInstr {
+public:
+ Register getDstReg() const { return getReg(0); }
+ Register getCarryOutReg() const { return getReg(1); }
+ MachineOperand &getLHS() { return getOperand(2); }
+ MachineOperand &getRHS() { return getOperand(3); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_UMULO:
+ case TargetOpcode::G_SMULO:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+/// Represents overflowing add/sub operations.
+/// Only carry-out:
+/// G_UADDO, G_SADDO, G_USUBO, G_SSUBO
+/// Carry-in and carry-out:
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GAddSubCarryOut : public GBinOpCarryOut {
+public:
+ bool isAdd() const {
+ switch (getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ return true;
+ default:
+ return false;
+ }
+ }
+ bool isSub() const { return !isAdd(); }
+
+ bool isSigned() const {
+ switch (getOpcode()) {
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_SSUBE:
+ return true;
+ default:
+ return false;
+ }
+ }
+ bool isUnsigned() const { return !isSigned(); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDO:
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_USUBO:
+ case TargetOpcode::G_SSUBO:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBE:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
+/// Represents overflowing add/sub operations that also consume a carry-in.
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GAddSubCarryInOut : public GAddSubCarryOut {
+public:
+ Register getCarryInReg() const { return getReg(4); }
+
+ static bool classof(const MachineInstr *MI) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_USUBE:
+ case TargetOpcode::G_SSUBE:
+ return true;
+ default:
+ return false;
+ }
+ }
+};
+
} // namespace llvm
#endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 29c9979c7533b..d7d2085289c6d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -269,6 +269,10 @@ class AArch64InstructionSelector : public InstructionSelector {
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
+ MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const;
MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
@@ -289,6 +293,11 @@ class AArch64InstructionSelector : public InstructionSelector {
MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
MachineIRBuilder &MIRBuilder) const;
+ /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
+ /// Might elide the instruction if the previous instruction already sets NZCV
+ /// correctly.
+ MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
+
/// Emit the overflow op for \p Opcode.
///
/// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
@@ -297,6 +306,8 @@ class AArch64InstructionSelector : public InstructionSelector {
emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
+ bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
+
/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
/// In some cases this is even possible with OR operations in the expression.
MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
@@ -3081,24 +3092,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
I.eraseFromParent();
return true;
}
+
+ case TargetOpcode::G_SADDE:
+ case TargetOpcode::G_UADDE:
+ case TargetOpcode::G_SSUBE:
+ case TargetOpcode::G_USUBE:
case TargetOpcode::G_SADDO:
case TargetOpcode::G_UADDO:
case TargetOpcode::G_SSUBO:
- case TargetOpcode::G_USUBO: {
- // Emit the operation and get the correct condition code.
- auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
- I.getOperand(2), I.getOperand(3), MIB);
-
- // Now, put the overflow result in the register given by the first operand
- // to the overflow op. CSINC increments the result when the predicate is
- // false, so to get the increment when it's true, we need to use the
- // inverse. In this case, we want to increment when carry is set.
- Register ZReg = AArch64::WZR;
- emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
- getInvertedCondCode(OpAndCC.second), MIB);
- I.eraseFromParent();
- return true;
- }
+ case TargetOpcode::G_USUBO:
+ return selectOverflowOp(I, MRI);
case TargetOpcode::G_PTRMASK: {
Register MaskReg = I.getOperand(2).getReg();
@@ -4555,6 +4558,28 @@ AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
}
+MachineInstr *
+AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
+ static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
+ return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
+ MachineOperand &RHS,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+ bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
+ static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
+ return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
+}
+
MachineInstr *
AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
MachineIRBuilder &MIRBuilder) const {
@@ -4761,6 +4786,72 @@ AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
return &*CSINC;
}
+MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
+ Register CarryReg) {
+ MachineRegisterInfo *MRI = MIB.getMRI();
+ unsigned Opcode = I.getOpcode();
+
+ // If the instruction is a SUB, we need to negate the carry,
+ // because borrowing is indicated by carry-flag == 0.
+ bool NeedsNegatedCarry =
+ (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
+
+ // If the previous instruction will already produce the correct carry, do not
+ // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
+ // generated during legalization of wide add/sub. This optimization depends on
+ // these sequences not being interrupted by other instructions.
+ MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
+ if (SrcMI == I.getPrevNode()) {
+ if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
+ bool ProducesNegatedCarry = CarrySrcMI->isSub();
+ if (NeedsNegatedCarry == ProducesNegatedCarry && CarrySrcMI->isUnsigned())
+ return nullptr;
+ }
+ }
+
+ Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
+
+ if (NeedsNegatedCarry) {
+ // (0 - Carry) sets !C in NZCV when Carry == 1
+ Register ZReg = AArch64::WZR;
+ return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
+ }
+
+ // (Carry - 1) sets !C in NZCV when Carry == 0
+ auto Fns = select12BitValueWithLeftShift(1);
+ return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
+}
+
+bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
+ MachineRegisterInfo &MRI) {
+ auto &CarryMI = cast<GAddSubCarryOut>(I);
+
+ if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
+ // Set NZCV carry according to carry-in VReg
+ emitCarryIn(I, CarryInMI->getCarryInReg());
+ }
+
+ // Emit the operation and get the correct condition code.
+ auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
+ CarryMI.getLHS(), CarryMI.getRHS(), MIB);
+
+ Register CarryOutReg = CarryMI.getCarryOutReg();
+
+ // Don't convert carry-out to VReg if it is never used
+ if (!MRI.use_nodbg_empty(CarryOutReg)) {
+ // Now, put the overflow result in the register given by the first operand
+ // to the overflow op. CSINC increments the result when the predicate is
+ // false, so to get the increment when it's true, we need to use the
+ // inverse. In this case, we want to increment when carry is set.
+ Register ZReg = AArch64::WZR;
+ emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
+ getInvertedCondCode(OpAndCC.second), MIB);
+ }
+
+ I.eraseFromParent();
+ return true;
+}
+
std::pair<MachineInstr *, AArch64CC::CondCode>
AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
MachineOperand &LHS,
@@ -4777,6 +4868,14 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
case TargetOpcode::G_USUBO:
return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+ case TargetOpcode::G_SADDE:
+ return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_UADDE:
+ return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+ case TargetOpcode::G_SSUBE:
+ return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+ case TargetOpcode::G_USUBE:
+ return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
}
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 670a16209705c..94584e20f5ab3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -73,10 +73,32 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
return AArch64::SUBWrr;
case AArch64::SUBSXrs:
return AArch64::SUBXrs;
+ case AArch64::SUBSWrs:
+ return AArch64::SUBWrs;
case AArch64::SUBSXri:
return AArch64::SUBXri;
case AArch64::SUBSWri:
return AArch64::SUBWri;
+ case AArch64::ADDSXrr:
+ return AArch64::ADDXrr;
+ case AArch64::ADDSWrr:
+ return AArch64::ADDWrr;
+ case AArch64::ADDSXrs:
+ return AArch64::ADDXrs;
+ case AArch64::ADDSWrs:
+ return AArch64::ADDWrs;
+ case AArch64::ADDSXri:
+ return AArch64::ADDXri;
+ case AArch64::ADDSWri:
+ return AArch64::ADDWri;
+ case AArch64::SBCSXr:
+ return AArch64::SBCXr;
+ case AArch64::SBCSWr:
+ return AArch64::SBCWr;
+ case AArch64::ADCSXr:
+ return AArch64::ADCXr;
+ case AArch64::ADCSWr:
+ return AArch64::ADCWr;
}
}
@@ -137,6 +159,12 @@ bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
}
bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
+ // If we find a dead NZCV implicit-def, we
+ // - try to convert the operation to a non-flag-setting equivalent
+ // - or mark the def as dead to aid later peephole optimizations.
+
+ // Use cases:
+ // 1)
// Consider the following code:
// FCMPSrr %0, %1, implicit-def $nzcv
// %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
@@ -153,8 +181,11 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
// in between the two FCMPs. In this case, the SUBS defines NZCV
// but it doesn't have any users, being overwritten by the second FCMP.
//
- // Our solution here is to try to convert flag setting operations between
- // a interval of identical FCMPs, so that CSE will be able to eliminate one.
+ // 2)
+ // The instruction selector always emits the flag-setting variant of ADC/SBC
+ // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these
+ // instructions is never used, we can switch to the non-flag-setting variant.
+
bool Changed = false;
auto &MF = *MBB.getParent();
auto &Subtarget = MF.getSubtarget();
@@ -163,52 +194,20 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
auto RBI = Subtarget.getRegBankInfo();
auto &MRI = MF.getRegInfo();
- // The first step is to find the first and last FCMPs. If we have found
- // at least two, then set the limit of the bottom-up walk to the first FCMP
- // found since we're only interested in dealing with instructions between
- // them.
- MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
- for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
- if (MI.getOpcode() == AArch64::FCMPSrr ||
- MI.getOpcode() == AArch64::FCMPDrr) {
- if (!FirstCmp)
- FirstCmp = &MI;
- else
- LastCmp = &MI;
- }
- }
-
- // In addition to converting flag-setting ops in fcmp ranges into non-flag
- // setting ops, across the whole basic block we also detect when nzcv
- // implicit-defs are dead, and mark them as dead. Peephole optimizations need
- // this information later.
-
LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
LRU.addLiveOuts(MBB);
- bool NZCVDead = LRU.available(AArch64::NZCV);
- bool InsideCmpRange = false;
- for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
- LRU.stepBackward(II);
-
- if (LastCmp) { // There's a range present in this block.
- // If we're inside an fcmp range, look for begin instruction.
- if (InsideCmpRange && &II == FirstCmp)
- InsideCmpRange = false;
- else if (&II == LastCmp)
- InsideCmpRange = true;
- }
- // Did this instruction define NZCV?
- bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
- if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
- // If we have a def and NZCV is dead, then we may convert this op.
+ for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
+ bool NZCVDead = LRU.available(AArch64::NZCV);
+ if (NZCVDead && II.definesRegister(AArch64::NZCV)) {
+ // The instruction defines NZCV, but NZCV is dead.
unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
if (DeadNZCVIdx != -1) {
- // If we're inside an fcmp range, then convert flag setting ops.
- if (InsideCmpRange && NewOpc) {
+ if (NewOpc) {
+ // If there is an equivalent non-flag-setting op, we convert.
LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
- "op in fcmp range: "
+ "op: "
<< II);
II.setDesc(TII->get(NewOpc));
II.removeOperand(DeadNZCVIdx);
@@ -225,8 +224,7 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
}
}
}
-
- NZCVDead = NZCVDeadAtCurrInstr;
+ LRU.stepBackward(II);
}
return Changed;
}
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index f94af34076e79..6158e7d7a8a81 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index a020e1327fcb8..e04660449b098 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -510,10 +510,14 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
@@ -527,10 +531,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
@@ -544,10 +552,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
@@ -561,10 +573,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
@@ -578,10 +594,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
@@ -810,7 +830,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -823,7 +845,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -836,7 +860,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -849,7 +875,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -862,7 +890,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1015,10 +1045,13 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
@@ -1032,10 +1065,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
@@ -1049,10 +1085,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
@@ -1066,10 +1105,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
@@ -1083,10 +1125,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
@@ -1315,7 +1360,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1328,7 +1374,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1341,7 +1388,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1354,7 +1402,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1367,7 +1416,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index 16bf4c269088c..cb8e7530dedf0 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -555,10 +555,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x2, x0, x9
-; -O0: bl __aarch64_cas16_relax
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -570,10 +578,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x2, x0, x9
-; -O0: bl __aarch64_cas16_acq
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -585,10 +601,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x2, x0, x9
-; -O0: bl __aarch64_cas16_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -600,10 +624,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x2, x0, x9
-; -O0: bl __aarch64_cas16_acq_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -615,10 +647,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x2, x0, x9
-; -O0: bl __aarch64_cas16_acq_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -845,7 +885,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -858,7 +900,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -871,7 +915,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -884,7 +930,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -897,7 +945,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1130,10 +1180,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x2, x0, x9
-; -O0: bl __aarch64_cas16_relax
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -1145,10 +1202,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x2, x0, x9
-; -O0: bl __aarch64_cas16_acq
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -1160,10 +1224,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x2, x0, x9
-; -O0: bl __aarch64_cas16_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -1175,10 +1246,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x2, x0, x9
-; -O0: bl __aarch64_cas16_acq_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1190,10 +1268,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x2, x0, x9
-; -O0: bl __aarch64_cas16_acq_rel
-; -O0: subs x10, x10, x11
-; -O0: ccmp x8, x9, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -1440,7 +1525,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1453,7 +1539,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1466,7 +1553,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1479,7 +1567,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1492,7 +1581,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
index 3a150acbf5aa3..987c0640c0f4a 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 5004db8d26560..25305e056d0b3 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index 40a055e505901..296dcf4052323 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -545,10 +545,14 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
@@ -562,10 +566,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
@@ -579,10 +587,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
@@ -596,10 +608,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
@@ -613,10 +629,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x2, x10, x12
+; -O0: adds x2, x9, x11
+; -O0: and w11, w9, #0x1
+; -O0: subs w11, w11, #1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
@@ -845,7 +865,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -858,7 +880,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -871,7 +895,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -884,7 +910,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -897,7 +925,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1050,10 +1080,13 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: casp x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldp x4, x5, [x0]
@@ -1067,10 +1100,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspa x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldp x4, x5, [x0]
@@ -1084,10 +1120,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspl x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldp x4, x5, [x0]
@@ -1101,10 +1140,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldp x4, x5, [x0]
@@ -1118,10 +1160,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x2, x10, x12
+; -O0: subs x2, x9, x11
+; -O0: and w11, w9, #0x1
; -O0: caspal x0, x1, x2, x3, [x8]
-; -O0: subs x11, x8, x11
-; -O0: ccmp x9, x10, #0, eq
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldp x4, x5, [x0]
@@ -1350,7 +1395,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1363,7 +1409,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1376,7 +1423,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1389,7 +1437,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1402,7 +1451,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
index c9e0ff0f5d497..f57d8006f7e8b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0: adds x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0: adds x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: adds x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: subs w10, w10, #1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0: adds x8, x11, x8
+; -O0: adds x9, x8, x9
+; -O0: and w11, w8, #0x1
+; -O0: subs w11, w11, #1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
; -O1: ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stxp w8, x14, x15, [x9]
-; -O0: stxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stxp w8, x14, x15, [x11]
+; -O0: stxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
; -O1: ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0: subs x14, x11, x10
-; -O0: ldxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
; -O1: ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
; -O1: ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0: subs x14, x11, x10
-; -O0: ldaxp x10, x12, [x9]
-; -O0: cmp x10, x11
-; -O0: cmp x12, x13
-; -O0: stlxp w8, x14, x15, [x9]
-; -O0: stlxp w8, x10, x12, [x9]
-; -O0: subs x12, x12, x13
-; -O0: ccmp x10, x11, #0, eq
+; -O0: subs x14, x8, x10
+; -O0: and w10, w8, #0x1
+; -O0: ldaxp x10, x9, [x11]
+; -O0: cmp x10, x12
+; -O0: cmp x9, x13
+; -O0: stlxp w8, x14, x15, [x11]
+; -O0: stlxp w8, x10, x9, [x11]
+; -O0: eor x8, x10, x8
+; -O0: eor x11, x9, x11
+; -O0: orr x8, x8, x11
+; -O0: subs x8, x8, #0
;
; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
; -O1: ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0: subs x8, x11, x8
+; -O0: subs x9, x8, x9
+; -O0: and w11, w8, #0x1
; -O0: bl __atomic_compare_exchange
;
; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
index 4bb5842a4e5b3..e54a8c076fb61 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
@@ -16,19 +16,20 @@ body: |
; CHECK-LABEL: name: test_fcmp_dead_cc
; CHECK: liveins: $w1, $x0, $s0, $s1
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
- ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
- ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
- ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
- ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
- ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
- ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
- ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSELWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%1:gpr64 = COPY $x0
%2:gpr32 = COPY $w1
%3:fpr32 = COPY $s0
@@ -60,19 +61,20 @@ body: |
; CHECK-LABEL: name: test_fcmp_64_dead_cc
; CHECK: liveins: $w1, $x0, $d0, $d1
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
- ; CHECK: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1
- ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
- ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
- ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
- ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
- ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
- ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
- ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSELWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%1:gpr64 = COPY $x0
%2:gpr32 = COPY $w1
%3:fpr64 = COPY $d0
@@ -104,21 +106,22 @@ body: |
; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps
; CHECK: liveins: $w1, $x0, $s0, $s1
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
- ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
- ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
- ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
- ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
- ; CHECK: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
- ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
- ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31
- ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
- ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
- ; CHECK: $w0 = COPY [[CSELWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%1:gpr64 = COPY $x0
%2:gpr32 = COPY $w1
%3:fpr32 = COPY $s0
@@ -137,45 +140,226 @@ body: |
...
---
-name: test_impdef_made_dead
+name: test_impdef_subsx
alignment: 4
legalized: true
regBankSelected: true
selected: true
tracksRegLiveness: true
-liveins:
- - { reg: '$x0' }
- - { reg: '$w1' }
body: |
bb.1:
- liveins: $w1, $x0, $s0, $s1
- ; Check that any dead imp-defs of NZCV are marked as such.
- ; CHECK-LABEL: name: test_impdef_made_dead
- ; CHECK: liveins: $w1, $x0, $s0, $s1
- ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
- ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
- ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
- ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
- ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
- ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY4]], implicit-def dead $nzcv
- ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
- ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31
- ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
- ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
- ; CHECK: %ret:gpr32 = SUBSWrr [[CSELWr]], [[SUBSWrr]], implicit-def dead $nzcv
- ; CHECK: $w0 = COPY [[CSELWr]]
- ; CHECK: RET_ReallyLR implicit $w0
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_impdef_subsx
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x0 = COPY [[SUBXrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%1:gpr64 = COPY $x0
- %2:gpr32 = COPY $w1
- %3:fpr32 = COPY $s0
- %4:fpr32 = COPY $s1
- %26:gpr32 = COPY $wzr
- %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
- FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
- %14:gpr32common = UBFMWri %12, 1, 31
- %60:gpr32 = MOVi32imm 1
- %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
- %ret:gpr32 = SUBSWrr %16, %12, implicit-def $nzcv
- $w0 = COPY %16
+ %2:gpr64 = COPY $x1
+ %4:gpr64 = SUBSXrr %1, %2, implicit-def $nzcv
+ $x0 = COPY %4
+ RET_ReallyLR implicit $x0
+...
+---
+name: test_impdef_subsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: test_impdef_subsw
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[SUBWrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr32 = COPY $x0
+ %2:gpr32 = COPY $x1
+ %4:gpr32 = SUBSWrr %1, %2, implicit-def $nzcv
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+...
+---
+name: test_impdef_addsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_impdef_addsx
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x0 = COPY [[ADDXrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %4:gpr64 = ADDSXrr %1, %2, implicit-def $nzcv
+ $x0 = COPY %4
+ RET_ReallyLR implicit $x0
+...
+---
+name: test_impdef_addsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: test_impdef_addsw
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+ ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[ADDWrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr32 = COPY $x0
+ %2:gpr32 = COPY $x1
+ %4:gpr32 = ADDSWrr %1, %2, implicit-def $nzcv
+ $w0 = COPY %4
RET_ReallyLR implicit $w0
...
+---
+name: test_impdef_adcsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+ ; CHECK-LABEL: name: test_impdef_adcsx
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCXr:%[0-9]+]]:gpr64 = ADCXr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %3:gpr64 = COPY $x2
+ %4:gpr64 = COPY $x3
+ %5:gpr64 = ADDSXrr %1, %3, implicit-def $nzcv
+ %6:gpr64 = ADCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $x0 = COPY %5
+ $x1 = COPY %6
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name: test_impdef_adcsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+ ; CHECK-LABEL: name: test_impdef_adcsw
+ ; CHECK: liveins: $w0, $w1, $w2, $w3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+ ; CHECK-NEXT: [[ADDSWrr:%[0-9]+]]:gpr32 = ADDSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCWr:%[0-9]+]]:gpr32 = ADCWr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[ADDSWrr]]
+ ; CHECK-NEXT: $w1 = COPY [[ADCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY $w1
+ %3:gpr32 = COPY $w2
+ %4:gpr32 = COPY $w3
+ %5:gpr32 = ADDSWrr %1, %3, implicit-def $nzcv
+ %6:gpr32 = ADCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $w0 = COPY %5
+ $w1 = COPY %6
+ RET_ReallyLR implicit $w0, implicit $w1
+...
+---
+name: test_impdef_sbcsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+ ; CHECK-LABEL: name: test_impdef_sbcsx
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCXr:%[0-9]+]]:gpr64 = SBCXr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %3:gpr64 = COPY $x2
+ %4:gpr64 = COPY $x3
+ %5:gpr64 = SUBSXrr %1, %3, implicit-def $nzcv
+ %6:gpr64 = SBCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $x0 = COPY %5
+ $x1 = COPY %6
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name: test_impdef_sbcsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+ ; CHECK-LABEL: name: test_impdef_sbcsw
+ ; CHECK: liveins: $w0, $w1, $w2, $w3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCWr:%[0-9]+]]:gpr32 = SBCWr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]]
+ ; CHECK-NEXT: $w1 = COPY [[SBCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY $w1
+ %3:gpr32 = COPY $w2
+ %4:gpr32 = COPY $w3
+ %5:gpr32 = SUBSWrr %1, %3, implicit-def $nzcv
+ %6:gpr32 = SBCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $w0 = COPY %5
+ $w1 = COPY %6
+ RET_ReallyLR implicit $w0, implicit $w1
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
new file mode 100644
index 0000000000000..e54a8c076fb61
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
@@ -0,0 +1,365 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: test_fcmp_dead_cc
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+body: |
+ bb.1:
+ liveins: $w1, $x0, $s0, $s1
+
+ ; CHECK-LABEL: name: test_fcmp_dead_cc
+ ; CHECK: liveins: $w1, $x0, $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr64 = COPY $x0
+ %2:gpr32 = COPY $w1
+ %3:fpr32 = COPY $s0
+ %4:fpr32 = COPY $s1
+ %26:gpr32 = COPY $wzr
+ FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+ FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %14:gpr32common = UBFMWri %12, 1, 31
+ %60:gpr32 = MOVi32imm 1
+ %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
+ $w0 = COPY %16
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_fcmp_64_dead_cc
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+body: |
+ bb.1:
+ liveins: $w1, $x0, $d0, $d1
+
+ ; CHECK-LABEL: name: test_fcmp_64_dead_cc
+ ; CHECK: liveins: $w1, $x0, $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr64 = COPY $x0
+ %2:gpr32 = COPY $w1
+ %3:fpr64 = COPY $d0
+ %4:fpr64 = COPY $d1
+ %26:gpr32 = COPY $wzr
+ FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+ FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %14:gpr32common = UBFMWri %12, 1, 31
+ %60:gpr32 = MOVi32imm 1
+ %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
+ $w0 = COPY %16
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_fcmp_dead_cc_3_fcmps
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+body: |
+ bb.1:
+ liveins: $w1, $x0, $s0, $s1
+
+ ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps
+ ; CHECK: liveins: $w1, $x0, $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+ ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+ ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr64 = COPY $x0
+ %2:gpr32 = COPY $w1
+ %3:fpr32 = COPY $s0
+ %4:fpr32 = COPY $s1
+ %26:gpr32 = COPY $wzr
+ FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+ FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+ FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+ %14:gpr32common = UBFMWri %12, 1, 31
+ %60:gpr32 = MOVi32imm 1
+ %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
+ $w0 = COPY %16
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_impdef_subsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_impdef_subsx
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x0 = COPY [[SUBXrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %4:gpr64 = SUBSXrr %1, %2, implicit-def $nzcv
+ $x0 = COPY %4
+ RET_ReallyLR implicit $x0
+...
+---
+name: test_impdef_subsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: test_impdef_subsw
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+ ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[SUBWrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr32 = COPY $x0
+ %2:gpr32 = COPY $x1
+ %4:gpr32 = SUBSWrr %1, %2, implicit-def $nzcv
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+...
+---
+name: test_impdef_addsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+ ; CHECK-LABEL: name: test_impdef_addsx
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $x0 = COPY [[ADDXrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %4:gpr64 = ADDSXrr %1, %2, implicit-def $nzcv
+ $x0 = COPY %4
+ RET_ReallyLR implicit $x0
+...
+---
+name: test_impdef_addsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+ ; CHECK-LABEL: name: test_impdef_addsw
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+ ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]]
+ ; CHECK-NEXT: $w0 = COPY [[ADDWrr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:gpr32 = COPY $x0
+ %2:gpr32 = COPY $x1
+ %4:gpr32 = ADDSWrr %1, %2, implicit-def $nzcv
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+...
+---
+name: test_impdef_adcsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+ ; CHECK-LABEL: name: test_impdef_adcsx
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCXr:%[0-9]+]]:gpr64 = ADCXr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %3:gpr64 = COPY $x2
+ %4:gpr64 = COPY $x3
+ %5:gpr64 = ADDSXrr %1, %3, implicit-def $nzcv
+ %6:gpr64 = ADCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $x0 = COPY %5
+ $x1 = COPY %6
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name: test_impdef_adcsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+ ; CHECK-LABEL: name: test_impdef_adcsw
+ ; CHECK: liveins: $w0, $w1, $w2, $w3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+ ; CHECK-NEXT: [[ADDSWrr:%[0-9]+]]:gpr32 = ADDSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCWr:%[0-9]+]]:gpr32 = ADCWr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[ADDSWrr]]
+ ; CHECK-NEXT: $w1 = COPY [[ADCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY $w1
+ %3:gpr32 = COPY $w2
+ %4:gpr32 = COPY $w3
+ %5:gpr32 = ADDSWrr %1, %3, implicit-def $nzcv
+ %6:gpr32 = ADCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $w0 = COPY %5
+ $w1 = COPY %6
+ RET_ReallyLR implicit $w0, implicit $w1
+...
+---
+name: test_impdef_sbcsx
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+ ; CHECK-LABEL: name: test_impdef_sbcsx
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCXr:%[0-9]+]]:gpr64 = SBCXr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %1:gpr64 = COPY $x0
+ %2:gpr64 = COPY $x1
+ %3:gpr64 = COPY $x2
+ %4:gpr64 = COPY $x3
+ %5:gpr64 = SUBSXrr %1, %3, implicit-def $nzcv
+ %6:gpr64 = SBCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $x0 = COPY %5
+ $x1 = COPY %6
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name: test_impdef_sbcsw
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1, $w2, $w3
+ ; CHECK-LABEL: name: test_impdef_sbcsw
+ ; CHECK: liveins: $w0, $w1, $w2, $w3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCWr:%[0-9]+]]:gpr32 = SBCWr [[COPY1]], [[COPY3]], implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]]
+ ; CHECK-NEXT: $w1 = COPY [[SBCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %1:gpr32 = COPY $w0
+ %2:gpr32 = COPY $w1
+ %3:gpr32 = COPY $w2
+ %4:gpr32 = COPY $w3
+ %5:gpr32 = SUBSWrr %1, %3, implicit-def $nzcv
+ %6:gpr32 = SBCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+ $w0 = COPY %5
+ $w1 = COPY %6
+ RET_ReallyLR implicit $w0, implicit $w1
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
new file mode 100644
index 0000000000000..85625ced4ba69
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name: sadde_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: sadde_s64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s64), %4:gpr(s32) = G_SADDE %0, %1, %2
+ $x0 = COPY %3(s64)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name: sadde_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: sadde_s32
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32 = ADCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[ADCSWr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = COPY $w1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32), %4:gpr(s32) = G_SADDE %0, %1, %2
+ $w0 = COPY %3(s32)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name: sadde_opt_prev_uaddo
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: sadde_opt_prev_uaddo
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %8:gpr(s64), %12:gpr(s32) = G_UADDO %0, %2
+ %9:gpr(s64), %13:gpr(s32) = G_SADDE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: sadde_opt_prev_uadde
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: sadde_opt_prev_uadde
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %6:gpr(s32) = G_CONSTANT i32 1
+ %8:gpr(s64), %12:gpr(s32) = G_UADDE %0, %2, %6
+ %9:gpr(s64), %13:gpr(s32) = G_SADDE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: sadde_opt_bail_clobber
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+ ; CHECK-LABEL: name: sadde_opt_bail_clobber
+ ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[CSINCWr]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: $x2 = COPY [[ADCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %4:gpr(s64) = COPY $x4
+ %5:gpr(s64) = COPY $x5
+ %6:gpr(s64) = COPY $x6
+ %7:gpr(s64), %11:gpr(s32) = G_UADDO %0, %4
+ %8:gpr(s64), %12:gpr(s32) = G_UADDE %1, %5, %11
+ ; carry-in is not produced by previous instruction
+ %9:gpr(s64), %13:gpr(s32) = G_SADDE %2, %6, %11
+ $x0 = COPY %7(s64)
+ $x1 = COPY %8(s64)
+ $x2 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
index 8742c7c68458b..42017e96e5a63 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
@@ -74,7 +74,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
@@ -101,7 +100,6 @@ body: |
; CHECK-NEXT: %copy1:gpr32 = COPY $w0
; CHECK-NEXT: %copy2:gpr32 = COPY $w1
; CHECK-NEXT: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy1:gpr(s32) = COPY $w0
@@ -129,7 +127,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
@@ -155,7 +152,6 @@ body: |
; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
; CHECK-NEXT: %reg1:gpr32 = COPY $w0
; CHECK-NEXT: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $x0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $x0
%reg0:gpr(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
new file mode 100644
index 0000000000000..00bd26cc0220d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name: ssube_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: ssube_s64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s64), %4:gpr(s32) = G_SSUBE %0, %1, %2
+ $x0 = COPY %3(s64)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name: ssube_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: ssube_s32
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32 = SBCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[SBCSWr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = COPY $w1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32), %4:gpr(s32) = G_SSUBE %0, %1, %2
+ $w0 = COPY %3(s32)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name: ssube_opt_prev_usubo
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: ssube_opt_prev_usubo
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %8:gpr(s64), %12:gpr(s32) = G_USUBO %0, %2
+ %9:gpr(s64), %13:gpr(s32) = G_SSUBE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: ssube_opt_prev_usube
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: ssube_opt_prev_usube
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %6:gpr(s32) = G_CONSTANT i32 1
+ %8:gpr(s64), %12:gpr(s32) = G_USUBE %0, %2, %6
+ %9:gpr(s64), %13:gpr(s32) = G_SSUBE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: ssube_opt_bail_clobber
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+ ; CHECK-LABEL: name: ssube_opt_bail_clobber
+ ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[CSINCWr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: $x2 = COPY [[SBCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %4:gpr(s64) = COPY $x4
+ %5:gpr(s64) = COPY $x5
+ %6:gpr(s64) = COPY $x6
+ %7:gpr(s64), %11:gpr(s32) = G_USUBO %0, %4
+ %8:gpr(s64), %12:gpr(s32) = G_USUBE %1, %5, %11
+ ; carry-in is not produced by previous instruction
+ %9:gpr(s64), %13:gpr(s32) = G_SSUBE %2, %6, %11
+ $x0 = COPY %7(s64)
+ $x1 = COPY %8(s64)
+ $x2 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
index 9055dd20d312c..5a47a4515930c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
@@ -74,7 +74,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
@@ -101,7 +100,6 @@ body: |
; CHECK-NEXT: %copy1:gpr32 = COPY $w0
; CHECK-NEXT: %copy2:gpr32 = COPY $w1
; CHECK-NEXT: %add:gpr32 = SUBSWrs %copy1, %copy2, 16, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy1:gpr(s32) = COPY $w0
@@ -129,7 +127,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
@@ -155,7 +152,6 @@ body: |
; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
; CHECK-NEXT: %reg1:gpr32 = COPY $w0
; CHECK-NEXT: %add:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
; CHECK-NEXT: $x0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $x0
%reg0:gpr(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
new file mode 100644
index 0000000000000..dc80d0c9abc25
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name: uadde_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: uadde_s64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s64), %4:gpr(s32) = G_UADDE %0, %1, %2
+ $x0 = COPY %3(s64)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name: uadde_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: uadde_s32
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32 = ADCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[ADCSWr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = COPY $w1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32), %4:gpr(s32) = G_UADDE %0, %1, %2
+ $w0 = COPY %3(s32)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name: uadde_opt_prev_uaddo
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: uadde_opt_prev_uaddo
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %8:gpr(s64), %12:gpr(s32) = G_UADDO %0, %2
+ %9:gpr(s64), %13:gpr(s32) = G_UADDE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: uadde_opt_prev_uadde
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: uadde_opt_prev_uadde
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %6:gpr(s32) = G_CONSTANT i32 1
+ %8:gpr(s64), %12:gpr(s32) = G_UADDE %0, %2, %6
+ %9:gpr(s64), %13:gpr(s32) = G_UADDE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: uadde_opt_bail_clobber
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+ ; CHECK-LABEL: name: uadde_opt_bail_clobber
+ ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+ ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[CSINCWr]], 1, 0, implicit-def $nzcv
+ ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+ ; CHECK-NEXT: $x2 = COPY [[ADCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %4:gpr(s64) = COPY $x4
+ %5:gpr(s64) = COPY $x5
+ %6:gpr(s64) = COPY $x6
+ %7:gpr(s64), %11:gpr(s32) = G_UADDO %0, %4
+ %8:gpr(s64), %12:gpr(s32) = G_UADDE %1, %5, %11
+ ; carry-in is not produced by previous instruction
+ %9:gpr(s64), %13:gpr(s32) = G_UADDE %2, %6, %11
+ $x0 = COPY %7(s64)
+ $x1 = COPY %8(s64)
+ $x2 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
index c325eb809eca2..55984b116e1ca 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
@@ -74,7 +74,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
@@ -101,7 +100,6 @@ body: |
; CHECK-NEXT: %copy1:gpr32 = COPY $w0
; CHECK-NEXT: %copy2:gpr32 = COPY $w1
; CHECK-NEXT: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy1:gpr(s32) = COPY $w0
@@ -129,7 +127,6 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
@@ -155,7 +152,6 @@ body: |
; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
; CHECK-NEXT: %reg1:gpr32 = COPY $w0
; CHECK-NEXT: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
; CHECK-NEXT: $x0 = COPY %add
; CHECK-NEXT: RET_ReallyLR implicit $x0
%reg0:gpr(s64) = COPY $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
new file mode 100644
index 0000000000000..c532474fc67b4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name: usube_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: usube_s64
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s64), %4:gpr(s32) = G_USUBE %0, %1, %2
+ $x0 = COPY %3(s64)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name: usube_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $w0, $w1
+
+ ; CHECK-LABEL: name: usube_s32
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32 = SBCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+ ; CHECK-NEXT: $w0 = COPY [[SBCSWr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = COPY $w1
+ %2:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32), %4:gpr(s32) = G_USUBE %0, %1, %2
+ $w0 = COPY %3(s32)
+ $w1 = COPY %4(s32)
+ RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name: usube_opt_prev_usubo
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: usube_opt_prev_usubo
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %8:gpr(s64), %12:gpr(s32) = G_USUBO %0, %2
+ %9:gpr(s64), %13:gpr(s32) = G_USUBE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: usube_opt_prev_usube
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x3
+
+ ; CHECK-LABEL: name: usube_opt_prev_usube
+ ; CHECK: liveins: $x0, $x1, $x2, $x3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %3:gpr(s64) = COPY $x3
+ %6:gpr(s32) = G_CONSTANT i32 1
+ %8:gpr(s64), %12:gpr(s32) = G_USUBE %0, %2, %6
+ %9:gpr(s64), %13:gpr(s32) = G_USUBE %1, %3, %12
+ $x0 = COPY %8(s64)
+ $x1 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name: usube_opt_bail_clobber
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+ ; CHECK-LABEL: name: usube_opt_bail_clobber
+ ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+ ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+ ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+ ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[CSINCWr]], implicit-def $nzcv
+ ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+ ; CHECK-NEXT: $x2 = COPY [[SBCSXr1]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(s64) = COPY $x2
+ %4:gpr(s64) = COPY $x4
+ %5:gpr(s64) = COPY $x5
+ %6:gpr(s64) = COPY $x6
+ %7:gpr(s64), %11:gpr(s32) = G_USUBO %0, %4
+ %8:gpr(s64), %12:gpr(s32) = G_USUBE %1, %5, %11
+ ; carry-in is not produced by previous instruction
+ %9:gpr(s64), %13:gpr(s32) = G_USUBE %2, %6, %11
+ $x0 = COPY %7(s64)
+ $x1 = COPY %8(s64)
+ $x2 = COPY %9(s64)
+ RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
index 45e02b40812d1..1fad9d41aa404 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
@@ -19,13 +19,15 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
- ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
- ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
%0:gpr(s32) = COPY $w0
%1:gpr(s32) = COPY $w1
%3:gpr(s32), %4:gpr(s32) = G_USUBO %0, %1
- $w0 = COPY %4(s32)
- RET_ReallyLR implicit $w0
+ $w0 = COPY %3
+ $w1 = COPY %4
+ RET_ReallyLR implicit $w0, implicit $w1
...
---
@@ -45,13 +47,15 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
- ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
- ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+ ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
%0:gpr(s64) = COPY $x0
%1:gpr(s64) = COPY $x1
%3:gpr(s64), %4:gpr(s32) = G_USUBO %0, %1
- $w0 = COPY %4
- RET_ReallyLR implicit $w0
+ $x0 = COPY %3
+ $w1 = COPY %4
+ RET_ReallyLR implicit $x0, implicit $w1
...
---
@@ -70,16 +74,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
- ; CHECK-NEXT: $w1 = COPY %overflow
- ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
%constant:gpr(s32) = G_CONSTANT i32 16
%add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy, %constant
$w0 = COPY %add
- $w1 = COPY %overflow
- RET_ReallyLR implicit $w0, implicit $w1
+ RET_ReallyLR implicit $w0
...
---
@@ -99,18 +100,15 @@ body: |
; CHECK-NEXT: %copy1:gpr32 = COPY $w0
; CHECK-NEXT: %copy2:gpr32 = COPY $w1
; CHECK-NEXT: %add:gpr32 = SUBSWrs %copy1, %copy2, 16, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
- ; CHECK-NEXT: $w1 = COPY %overflow
- ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy1:gpr(s32) = COPY $w0
%copy2:gpr(s32) = COPY $w1
%constant:gpr(s32) = G_CONSTANT i32 16
%shift:gpr(s32) = G_SHL %copy2(s32), %constant(s32)
%add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy1, %shift
$w0 = COPY %add(s32)
- $w1 = COPY %overflow(s32)
- RET_ReallyLR implicit $w0, implicit $w1
+ RET_ReallyLR implicit $w0
...
---
@@ -129,16 +127,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %copy:gpr32sp = COPY $w0
; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
- ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
; CHECK-NEXT: $w0 = COPY %add
- ; CHECK-NEXT: $w1 = COPY %overflow
- ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
%copy:gpr(s32) = COPY $w0
%constant:gpr(s32) = G_CONSTANT i32 -16
%add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy, %constant
$w0 = COPY %add(s32)
- $w1 = COPY %overflow(s32)
- RET_ReallyLR implicit $w0, implicit $w1
+ RET_ReallyLR implicit $w0
...
---
@@ -157,10 +152,8 @@ body: |
; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
; CHECK-NEXT: %reg1:gpr32 = COPY $w0
; CHECK-NEXT: %add:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
- ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
; CHECK-NEXT: $x0 = COPY %add
- ; CHECK-NEXT: $w1 = COPY %flags
- ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+ ; CHECK-NEXT: RET_ReallyLR implicit $x0
%reg0:gpr(s64) = COPY $x0
%reg1:gpr(s32) = COPY $w0
%ext:gpr(s64) = G_ZEXT %reg1(s32)
@@ -168,5 +161,4 @@ body: |
%shift:gpr(s64) = G_SHL %ext, %cst(s64)
%add:gpr(s64), %flags:gpr(s32) = G_USUBO %reg0, %shift
$x0 = COPY %add(s64)
- $w1 = COPY %flags
- RET_ReallyLR implicit $x0, implicit $w1
+ RET_ReallyLR implicit $x0
diff --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 1dd282a142193..0a3ee98f843c8 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -57,7 +57,6 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
; CHECK-NEXT: // kill: def $x10 killed $w10
; CHECK-NEXT: bfi x8, x10, #32, #32
; CHECK-NEXT: adds x8, x8, x9
-; CHECK-NEXT: cset w9, hs
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
Entry:
More information about the llvm-commits
mailing list