[llvm] 84a6a05 - [AArch64][GlobalISel] Select G_UADDE/G_SADDE/G_USUBE/G_SSUBE

Sun Jun 25 14:32:07 PDT 2023

Author: Tobias Stadler
Date: 2023-06-25T14:32:00-07:00
New Revision: 84a6a057e60be1f3f46cf1f02e9ba190dab805da

URL: https://github.com/llvm/llvm-project/commit/84a6a057e60be1f3f46cf1f02e9ba190dab805da
DIFF: https://github.com/llvm/llvm-project/commit/84a6a057e60be1f3f46cf1f02e9ba190dab805da.diff

LOG: [AArch64][GlobalISel] Select G_UADDE/G_SADDE/G_USUBE/G_SSUBE

This implements the remaining overflow generating instructions in the AArch64
GlobalISel selector. Now wide add/sub operations do not fallback to SelectionDAG
anymore. We make use of PostSelectOptimize to cleanup the hereby generated
flag-setting operations when the carry-out is unused. Since we do not fallback
anymore when selecting add/sub atomics on O0 some test changes were required
there.

Fixes: https://github.com/llvm/llvm-project/issues/59407

Differential Revision: https://reviews.llvm.org/D153164

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
    llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
    llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
    llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
    llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
    llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
    llvm/test/CodeGen/AArch64/popcount.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index 049efa672f5be..8484d970aff0a 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -258,6 +258,106 @@ class GFCmp : public GAnyCmp {
   }
 };
 
+/// Represents overflowing binary operations.
+/// Only carry-out:
+/// G_UADDO, G_SADDO, G_USUBO, G_SSUBO, G_UMULO, G_SMULO
+/// Carry-in and carry-out:
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GBinOpCarryOut : public GenericMachineInstr {
+public:
+  Register getDstReg() const { return getReg(0); }
+  Register getCarryOutReg() const { return getReg(1); }
+  MachineOperand &getLHS() { return getOperand(2); }
+  MachineOperand &getRHS() { return getOperand(3); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_UADDO:
+    case TargetOpcode::G_SADDO:
+    case TargetOpcode::G_USUBO:
+    case TargetOpcode::G_SSUBO:
+    case TargetOpcode::G_UADDE:
+    case TargetOpcode::G_SADDE:
+    case TargetOpcode::G_USUBE:
+    case TargetOpcode::G_SSUBE:
+    case TargetOpcode::G_UMULO:
+    case TargetOpcode::G_SMULO:
+      return true;
+    default:
+      return false;
+    }
+  }
+};
+
+/// Represents overflowing add/sub operations.
+/// Only carry-out:
+/// G_UADDO, G_SADDO, G_USUBO, G_SSUBO
+/// Carry-in and carry-out:
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GAddSubCarryOut : public GBinOpCarryOut {
+public:
+  bool isAdd() const {
+    switch (getOpcode()) {
+    case TargetOpcode::G_UADDO:
+    case TargetOpcode::G_SADDO:
+    case TargetOpcode::G_UADDE:
+    case TargetOpcode::G_SADDE:
+      return true;
+    default:
+      return false;
+    }
+  }
+  bool isSub() const { return !isAdd(); }
+
+  bool isSigned() const {
+    switch (getOpcode()) {
+    case TargetOpcode::G_SADDO:
+    case TargetOpcode::G_SSUBO:
+    case TargetOpcode::G_SADDE:
+    case TargetOpcode::G_SSUBE:
+      return true;
+    default:
+      return false;
+    }
+  }
+  bool isUnsigned() const { return !isSigned(); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_UADDO:
+    case TargetOpcode::G_SADDO:
+    case TargetOpcode::G_USUBO:
+    case TargetOpcode::G_SSUBO:
+    case TargetOpcode::G_UADDE:
+    case TargetOpcode::G_SADDE:
+    case TargetOpcode::G_USUBE:
+    case TargetOpcode::G_SSUBE:
+      return true;
+    default:
+      return false;
+    }
+  }
+};
+
+/// Represents overflowing add/sub operations that also consume a carry-in.
+/// G_UADDE, G_SADDE, G_USUBE, G_SSUBE
+class GAddSubCarryInOut : public GAddSubCarryOut {
+public:
+  Register getCarryInReg() const { return getReg(4); }
+
+  static bool classof(const MachineInstr *MI) {
+    switch (MI->getOpcode()) {
+    case TargetOpcode::G_UADDE:
+    case TargetOpcode::G_SADDE:
+    case TargetOpcode::G_USUBE:
+    case TargetOpcode::G_SSUBE:
+      return true;
+    default:
+      return false;
+    }
+  }
+};
+
 } // namespace llvm
 
 #endif // LLVM_CODEGEN_GLOBALISEL_GENERICMACHINEINSTRS_H

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 29c9979c7533b..d7d2085289c6d 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -269,6 +269,10 @@ class AArch64InstructionSelector : public InstructionSelector {
                          MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
                          MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+                         MachineIRBuilder &MIRBuilder) const;
+  MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
+                         MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                         MachineIRBuilder &MIRBuilder) const;
   MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
@@ -289,6 +293,11 @@ class AArch64InstructionSelector : public InstructionSelector {
   MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
                                 MachineIRBuilder &MIRBuilder) const;
 
+  /// Emit an instruction that sets NZCV to the carry-in expected by \p I.
+  /// Might elide the instruction if the previous instruction already sets NZCV
+  /// correctly.
+  MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg);
+
   /// Emit the overflow op for \p Opcode.
   ///
   /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
@@ -297,6 +306,8 @@ class AArch64InstructionSelector : public InstructionSelector {
   emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
                  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
 
+  bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI);
+
   /// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
   /// In some cases this is even possible with OR operations in the expression.
   MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC,
@@ -3081,24 +3092,16 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
     I.eraseFromParent();
     return true;
   }
+
+  case TargetOpcode::G_SADDE:
+  case TargetOpcode::G_UADDE:
+  case TargetOpcode::G_SSUBE:
+  case TargetOpcode::G_USUBE:
   case TargetOpcode::G_SADDO:
   case TargetOpcode::G_UADDO:
   case TargetOpcode::G_SSUBO:
-  case TargetOpcode::G_USUBO: {
-    // Emit the operation and get the correct condition code.
-    auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
-                                  I.getOperand(2), I.getOperand(3), MIB);
-
-    // Now, put the overflow result in the register given by the first operand
-    // to the overflow op. CSINC increments the result when the predicate is
-    // false, so to get the increment when it's true, we need to use the
-    // inverse. In this case, we want to increment when carry is set.
-    Register ZReg = AArch64::WZR;
-    emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
-              getInvertedCondCode(OpAndCC.second), MIB);
-    I.eraseFromParent();
-    return true;
-  }
+  case TargetOpcode::G_USUBO:
+    return selectOverflowOp(I, MRI);
 
   case TargetOpcode::G_PTRMASK: {
     Register MaskReg = I.getOperand(2).getReg();
@@ -4555,6 +4558,28 @@ AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
 }
 
+MachineInstr *
+AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS,
+                                     MachineOperand &RHS,
+                                     MachineIRBuilder &MIRBuilder) const {
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+  bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
+  static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr};
+  return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
+}
+
+MachineInstr *
+AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS,
+                                     MachineOperand &RHS,
+                                     MachineIRBuilder &MIRBuilder) const {
+  assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
+  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+  bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32);
+  static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr};
+  return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder);
+}
+
 MachineInstr *
 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
                                     MachineIRBuilder &MIRBuilder) const {
@@ -4761,6 +4786,72 @@ AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
   return &*CSINC;
 }
 
+MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I,
+                                                      Register CarryReg) {
+  MachineRegisterInfo *MRI = MIB.getMRI();
+  unsigned Opcode = I.getOpcode();
+
+  // If the instruction is a SUB, we need to negate the carry,
+  // because borrowing is indicated by carry-flag == 0.
+  bool NeedsNegatedCarry =
+      (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE);
+
+  // If the previous instruction will already produce the correct carry, do not
+  // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences
+  // generated during legalization of wide add/sub. This optimization depends on
+  // these sequences not being interrupted by other instructions.
+  MachineInstr *SrcMI = MRI->getVRegDef(CarryReg);
+  if (SrcMI == I.getPrevNode()) {
+    if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) {
+      bool ProducesNegatedCarry = CarrySrcMI->isSub();
+      if (NeedsNegatedCarry == ProducesNegatedCarry && CarrySrcMI->isUnsigned())
+        return nullptr;
+    }
+  }
+
+  Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass);
+
+  if (NeedsNegatedCarry) {
+    // (0 - Carry) sets !C in NZCV when Carry == 1
+    Register ZReg = AArch64::WZR;
+    return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB);
+  }
+
+  // (Carry - 1) sets !C in NZCV when Carry == 0
+  auto Fns = select12BitValueWithLeftShift(1);
+  return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns);
+}
+
+bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I,
+                                                  MachineRegisterInfo &MRI) {
+  auto &CarryMI = cast<GAddSubCarryOut>(I);
+
+  if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) {
+    // Set NZCV carry according to carry-in VReg
+    emitCarryIn(I, CarryInMI->getCarryInReg());
+  }
+
+  // Emit the operation and get the correct condition code.
+  auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(),
+                                CarryMI.getLHS(), CarryMI.getRHS(), MIB);
+
+  Register CarryOutReg = CarryMI.getCarryOutReg();
+
+  // Don't convert carry-out to VReg if it is never used
+  if (!MRI.use_nodbg_empty(CarryOutReg)) {
+    // Now, put the overflow result in the register given by the first operand
+    // to the overflow op. CSINC increments the result when the predicate is
+    // false, so to get the increment when it's true, we need to use the
+    // inverse. In this case, we want to increment when carry is set.
+    Register ZReg = AArch64::WZR;
+    emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg,
+              getInvertedCondCode(OpAndCC.second), MIB);
+  }
+
+  I.eraseFromParent();
+  return true;
+}
+
 std::pair<MachineInstr *, AArch64CC::CondCode>
 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
                                            MachineOperand &LHS,
@@ -4777,6 +4868,14 @@ AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
   case TargetOpcode::G_USUBO:
     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
+  case TargetOpcode::G_SADDE:
+    return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+  case TargetOpcode::G_UADDE:
+    return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
+  case TargetOpcode::G_SSUBE:
+    return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
+  case TargetOpcode::G_USUBE:
+    return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
   }
 }
 

diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
index 670a16209705c..94584e20f5ab3 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostSelectOptimize.cpp
@@ -73,10 +73,32 @@ unsigned getNonFlagSettingVariant(unsigned Opc) {
     return AArch64::SUBWrr;
   case AArch64::SUBSXrs:
     return AArch64::SUBXrs;
+  case AArch64::SUBSWrs:
+    return AArch64::SUBWrs;
   case AArch64::SUBSXri:
     return AArch64::SUBXri;
   case AArch64::SUBSWri:
     return AArch64::SUBWri;
+  case AArch64::ADDSXrr:
+    return AArch64::ADDXrr;
+  case AArch64::ADDSWrr:
+    return AArch64::ADDWrr;
+  case AArch64::ADDSXrs:
+    return AArch64::ADDXrs;
+  case AArch64::ADDSWrs:
+    return AArch64::ADDWrs;
+  case AArch64::ADDSXri:
+    return AArch64::ADDXri;
+  case AArch64::ADDSWri:
+    return AArch64::ADDWri;
+  case AArch64::SBCSXr:
+    return AArch64::SBCXr;
+  case AArch64::SBCSWr:
+    return AArch64::SBCWr;
+  case AArch64::ADCSXr:
+    return AArch64::ADCXr;
+  case AArch64::ADCSWr:
+    return AArch64::ADCWr;
   }
 }
 
@@ -137,6 +159,12 @@ bool AArch64PostSelectOptimize::foldSimpleCrossClassCopies(MachineInstr &MI) {
 }
 
 bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
+  // If we find a dead NZCV implicit-def, we
+  // - try to convert the operation to a non-flag-setting equivalent
+  // - or mark the def as dead to aid later peephole optimizations.
+
+  // Use cases:
+  // 1)
   // Consider the following code:
   //  FCMPSrr %0, %1, implicit-def $nzcv
   //  %sel1:gpr32 = CSELWr %_, %_, 12, implicit $nzcv
@@ -153,8 +181,11 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
   // in between the two FCMPs. In this case, the SUBS defines NZCV
   // but it doesn't have any users, being overwritten by the second FCMP.
   //
-  // Our solution here is to try to convert flag setting operations between
-  // a interval of identical FCMPs, so that CSE will be able to eliminate one.
+  // 2)
+  // The instruction selector always emits the flag-setting variant of ADC/SBC
+  // while selecting G_UADDE/G_SADDE/G_USUBE/G_SSUBE. If the carry-out of these
+  // instructions is never used, we can switch to the non-flag-setting variant.
+
   bool Changed = false;
   auto &MF = *MBB.getParent();
   auto &Subtarget = MF.getSubtarget();
@@ -163,52 +194,20 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
   auto RBI = Subtarget.getRegBankInfo();
   auto &MRI = MF.getRegInfo();
 
-  // The first step is to find the first and last FCMPs. If we have found
-  // at least two, then set the limit of the bottom-up walk to the first FCMP
-  // found since we're only interested in dealing with instructions between
-  // them.
-  MachineInstr *FirstCmp = nullptr, *LastCmp = nullptr;
-  for (auto &MI : instructionsWithoutDebug(MBB.begin(), MBB.end())) {
-    if (MI.getOpcode() == AArch64::FCMPSrr ||
-        MI.getOpcode() == AArch64::FCMPDrr) {
-      if (!FirstCmp)
-        FirstCmp = &MI;
-      else
-        LastCmp = &MI;
-    }
-  }
-
-  // In addition to converting flag-setting ops in fcmp ranges into non-flag
-  // setting ops, across the whole basic block we also detect when nzcv
-  // implicit-defs are dead, and mark them as dead. Peephole optimizations need
-  // this information later.
-
   LiveRegUnits LRU(*MBB.getParent()->getSubtarget().getRegisterInfo());
   LRU.addLiveOuts(MBB);
-  bool NZCVDead = LRU.available(AArch64::NZCV);
-  bool InsideCmpRange = false;
-  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
-    LRU.stepBackward(II);
-
-    if (LastCmp) { // There's a range present in this block.
-      // If we're inside an fcmp range, look for begin instruction.
-      if (InsideCmpRange && &II == FirstCmp)
-        InsideCmpRange = false;
-      else if (&II == LastCmp)
-        InsideCmpRange = true;
-    }
 
-    // Did this instruction define NZCV?
-    bool NZCVDeadAtCurrInstr = LRU.available(AArch64::NZCV);
-    if (NZCVDead && NZCVDeadAtCurrInstr && II.definesRegister(AArch64::NZCV)) {
-      // If we have a def and NZCV is dead, then we may convert this op.
+  for (auto &II : instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) {
+    bool NZCVDead = LRU.available(AArch64::NZCV);
+    if (NZCVDead && II.definesRegister(AArch64::NZCV)) {
+      // The instruction defines NZCV, but NZCV is dead.
       unsigned NewOpc = getNonFlagSettingVariant(II.getOpcode());
       int DeadNZCVIdx = II.findRegisterDefOperandIdx(AArch64::NZCV);
       if (DeadNZCVIdx != -1) {
-        // If we're inside an fcmp range, then convert flag setting ops.
-        if (InsideCmpRange && NewOpc) {
+        if (NewOpc) {
+          // If there is an equivalent non-flag-setting op, we convert.
           LLVM_DEBUG(dbgs() << "Post-select optimizer: converting flag-setting "
-                               "op in fcmp range: "
+                               "op: "
                             << II);
           II.setDesc(TII->get(NewOpc));
           II.removeOperand(DeadNZCVIdx);
@@ -225,8 +224,7 @@ bool AArch64PostSelectOptimize::optimizeNZCVDefs(MachineBasicBlock &MBB) {
         }
       }
     }
-
-    NZCVDead = NZCVDeadAtCurrInstr;
+    LRU.stepBackward(II);
   }
   return Changed;
 }

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
index f94af34076e79..6158e7d7a8a81 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
index a020e1327fcb8..e04660449b098 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll
@@ -510,10 +510,14 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -527,10 +531,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -544,10 +552,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -561,10 +573,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -578,10 +594,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -810,7 +830,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -823,7 +845,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -836,7 +860,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -849,7 +875,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -862,7 +890,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1015,10 +1045,13 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -1032,10 +1065,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -1049,10 +1085,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -1066,10 +1105,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -1083,10 +1125,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -1315,7 +1360,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1328,7 +1374,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1341,7 +1388,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1354,7 +1402,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1367,7 +1416,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
index 16bf4c269088c..cb8e7530dedf0 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll
@@ -555,10 +555,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x2, x0, x9
-; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -570,10 +578,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x2, x0, x9
-; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -585,10 +601,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x2, x0, x9
-; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -600,10 +624,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x2, x0, x9
-; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -615,10 +647,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x2, x0, x9
-; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -845,7 +885,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -858,7 +900,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -871,7 +915,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -884,7 +930,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -897,7 +945,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1130,10 +1180,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x2, x0, x9
-; -O0:    bl __aarch64_cas16_relax
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1145,10 +1202,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x2, x0, x9
-; -O0:    bl __aarch64_cas16_acq
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1160,10 +1224,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x2, x0, x9
-; -O0:    bl __aarch64_cas16_rel
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1175,10 +1246,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x2, x0, x9
-; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1190,10 +1268,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x2, x0, x9
-; -O0:    bl __aarch64_cas16_acq_rel
-; -O0:    subs x10, x10, x11
-; -O0:    ccmp x8, x9, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1440,7 +1525,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1453,7 +1539,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1466,7 +1553,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1479,7 +1567,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1492,7 +1581,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
index 3a150acbf5aa3..987c0640c0f4a 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
index 5004db8d26560..25305e056d0b3 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
index 40a055e505901..296dcf4052323 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll
@@ -545,10 +545,14 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -562,10 +566,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -579,10 +587,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -596,10 +608,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -613,10 +629,14 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x2, x10, x12
+; -O0:    adds x2, x9, x11
+; -O0:    and w11, w9, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -845,7 +865,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -858,7 +880,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -871,7 +895,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -884,7 +910,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -897,7 +925,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1050,10 +1080,13 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    casp x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldp x4, x5, [x0]
@@ -1067,10 +1100,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspa x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldp x4, x5, [x0]
@@ -1084,10 +1120,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspl x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldp x4, x5, [x0]
@@ -1101,10 +1140,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldp x4, x5, [x0]
@@ -1118,10 +1160,13 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x2, x10, x12
+; -O0:    subs x2, x9, x11
+; -O0:    and w11, w9, #0x1
 ; -O0:    caspal x0, x1, x2, x3, [x8]
-; -O0:    subs x11, x8, x11
-; -O0:    ccmp x9, x10, #0, eq
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldp x4, x5, [x0]
@@ -1350,7 +1395,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -1363,7 +1409,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -1376,7 +1423,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -1389,7 +1437,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -1402,7 +1451,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
index c9e0ff0f5d497..f57d8006f7e8b 100644
--- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
+++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll
@@ -925,14 +925,18 @@ define dso_local i64 @atomicrmw_add_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_monotonic:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -944,14 +948,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acquire:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -963,14 +971,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_release:
-; -O0:    adds x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -982,14 +994,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_acq_rel:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1001,14 +1017,18 @@ define dso_local i128 @atomicrmw_add_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_add_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_aligned_seq_cst:
-; -O0:    adds x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    adds x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    subs w10, w10, #1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_add_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1285,7 +1305,9 @@ define dso_local i64 @atomicrmw_add_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_monotonic:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic:
@@ -1298,7 +1320,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acquire:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire:
@@ -1311,7 +1335,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_release:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_release:
@@ -1324,7 +1350,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel:
@@ -1337,7 +1365,9 @@ define dso_local i128 @atomicrmw_add_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_add_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
-; -O0:    adds x8, x11, x8
+; -O0:    adds x9, x8, x9
+; -O0:    and w11, w8, #0x1
+; -O0:    subs w11, w11, #1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst:
@@ -1675,14 +1705,17 @@ define dso_local i64 @atomicrmw_sub_i64_aligned_seq_cst(ptr %ptr, i64 %value) {
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_monotonic:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_monotonic:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1694,14 +1727,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_monotonic(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acquire:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stxp w8, x14, x15, [x9]
-; -O0:    stxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stxp w8, x14, x15, [x11]
+; -O0:    stxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acquire:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1713,14 +1749,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acquire(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_release:
-; -O0:    subs x14, x11, x10
-; -O0:    ldxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_release:
 ; -O1:    ldxp x0, x1, [x8]
@@ -1732,14 +1771,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_release(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_acq_rel:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -1751,14 +1793,17 @@ define dso_local i128 @atomicrmw_sub_i128_aligned_acq_rel(ptr %ptr, i128 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_aligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
-; -O0:    subs x14, x11, x10
-; -O0:    ldaxp x10, x12, [x9]
-; -O0:    cmp x10, x11
-; -O0:    cmp x12, x13
-; -O0:    stlxp w8, x14, x15, [x9]
-; -O0:    stlxp w8, x10, x12, [x9]
-; -O0:    subs x12, x12, x13
-; -O0:    ccmp x10, x11, #0, eq
+; -O0:    subs x14, x8, x10
+; -O0:    and w10, w8, #0x1
+; -O0:    ldaxp x10, x9, [x11]
+; -O0:    cmp x10, x12
+; -O0:    cmp x9, x13
+; -O0:    stlxp w8, x14, x15, [x11]
+; -O0:    stlxp w8, x10, x9, [x11]
+; -O0:    eor x8, x10, x8
+; -O0:    eor x11, x9, x11
+; -O0:    orr x8, x8, x11
+; -O0:    subs x8, x8, #0
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_aligned_seq_cst:
 ; -O1:    ldaxp x0, x1, [x8]
@@ -2035,7 +2080,8 @@ define dso_local i64 @atomicrmw_sub_i64_unaligned_seq_cst(ptr %ptr, i64 %value)
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic:
@@ -2048,7 +2094,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_monotonic(ptr %ptr, i128 %va
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acquire:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire:
@@ -2061,7 +2108,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acquire(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_release:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release:
@@ -2074,7 +2122,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_release(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel:
@@ -2087,7 +2136,8 @@ define dso_local i128 @atomicrmw_sub_i128_unaligned_acq_rel(ptr %ptr, i128 %valu
 
 define dso_local i128 @atomicrmw_sub_i128_unaligned_seq_cst(ptr %ptr, i128 %value) {
 ; -O0-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:
-; -O0:    subs x8, x11, x8
+; -O0:    subs x9, x8, x9
+; -O0:    and w11, w8, #0x1
 ; -O0:    bl __atomic_compare_exchange
 ;
 ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst:

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
index 4bb5842a4e5b3..e54a8c076fb61 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs-in-fcmp.mir
@@ -16,19 +16,20 @@ body:             |
 
     ; CHECK-LABEL: name: test_fcmp_dead_cc
     ; CHECK: liveins: $w1, $x0, $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
-    ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
-    ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
-    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
-    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
-    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
-    ; CHECK: $w0 = COPY [[CSELWr]]
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %1:gpr64 = COPY $x0
     %2:gpr32 = COPY $w1
     %3:fpr32 = COPY $s0
@@ -60,19 +61,20 @@ body:             |
 
     ; CHECK-LABEL: name: test_fcmp_64_dead_cc
     ; CHECK: liveins: $w1, $x0, $d0, $d1
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
-    ; CHECK: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1
-    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
-    ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
-    ; CHECK: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
-    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
-    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
-    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
-    ; CHECK: $w0 = COPY [[CSELWr]]
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %1:gpr64 = COPY $x0
     %2:gpr32 = COPY $w1
     %3:fpr64 = COPY $d0
@@ -104,21 +106,22 @@ body:             |
 
     ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps
     ; CHECK: liveins: $w1, $x0, $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
-    ; CHECK: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
-    ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
-    ; CHECK: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
-    ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
-    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31
-    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
-    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
-    ; CHECK: $w0 = COPY [[CSELWr]]
-    ; CHECK: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %1:gpr64 = COPY $x0
     %2:gpr32 = COPY $w1
     %3:fpr32 = COPY $s0
@@ -137,45 +140,226 @@ body:             |
 
 ...
 ---
-name:            test_impdef_made_dead
+name:            test_impdef_subsx
 alignment:       4
 legalized:       true
 regBankSelected: true
 selected:        true
 tracksRegLiveness: true
-liveins:
-  - { reg: '$x0' }
-  - { reg: '$w1' }
 body:             |
   bb.1:
-    liveins: $w1, $x0, $s0, $s1
-    ; Check that any dead imp-defs of NZCV are marked as such.
-    ; CHECK-LABEL: name: test_impdef_made_dead
-    ; CHECK: liveins: $w1, $x0, $s0, $s1
-    ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
-    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
-    ; CHECK: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
-    ; CHECK: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
-    ; CHECK: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
-    ; CHECK: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY1]], [[COPY4]], implicit-def dead $nzcv
-    ; CHECK: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
-    ; CHECK: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBSWrr]], 1, 31
-    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
-    ; CHECK: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
-    ; CHECK: %ret:gpr32 = SUBSWrr [[CSELWr]], [[SUBSWrr]], implicit-def dead $nzcv
-    ; CHECK: $w0 = COPY [[CSELWr]]
-    ; CHECK: RET_ReallyLR implicit $w0
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_impdef_subsx
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $x0 = COPY [[SUBXrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %1:gpr64 = COPY $x0
-    %2:gpr32 = COPY $w1
-    %3:fpr32 = COPY $s0
-    %4:fpr32 = COPY $s1
-    %26:gpr32 = COPY $wzr
-    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
-    FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
-    %14:gpr32common = UBFMWri %12, 1, 31
-    %60:gpr32 = MOVi32imm 1
-    %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
-    %ret:gpr32 = SUBSWrr %16, %12, implicit-def $nzcv
-    $w0 = COPY %16
+    %2:gpr64 = COPY $x1
+    %4:gpr64 = SUBSXrr %1, %2, implicit-def $nzcv
+    $x0 = COPY %4
+    RET_ReallyLR implicit $x0
+...
+---
+name:            test_impdef_subsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: test_impdef_subsw
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $w0 = COPY [[SUBWrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr32 = COPY $x0
+    %2:gpr32 = COPY $x1
+    %4:gpr32 = SUBSWrr %1, %2, implicit-def $nzcv
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+...
+---
+name:            test_impdef_addsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_impdef_addsx
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $x0 = COPY [[ADDXrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %4:gpr64 = ADDSXrr %1, %2, implicit-def $nzcv
+    $x0 = COPY %4
+    RET_ReallyLR implicit $x0
+...
+---
+name:            test_impdef_addsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: test_impdef_addsw
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+    ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $w0 = COPY [[ADDWrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr32 = COPY $x0
+    %2:gpr32 = COPY $x1
+    %4:gpr32 = ADDSWrr %1, %2, implicit-def $nzcv
+    $w0 = COPY %4
     RET_ReallyLR implicit $w0
 ...
+---
+name:            test_impdef_adcsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: test_impdef_adcsx
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCXr:%[0-9]+]]:gpr64 = ADCXr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %3:gpr64 = COPY $x2
+    %4:gpr64 = COPY $x3
+    %5:gpr64 = ADDSXrr %1, %3, implicit-def $nzcv
+    %6:gpr64 = ADCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $x0 = COPY %5
+    $x1 = COPY %6
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name:            test_impdef_adcsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1, $w2, $w3
+    ; CHECK-LABEL: name: test_impdef_adcsw
+    ; CHECK: liveins: $w0, $w1, $w2, $w3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+    ; CHECK-NEXT: [[ADDSWrr:%[0-9]+]]:gpr32 = ADDSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCWr:%[0-9]+]]:gpr32 = ADCWr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[ADDSWrr]]
+    ; CHECK-NEXT: $w1 = COPY [[ADCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY $w1
+    %3:gpr32 = COPY $w2
+    %4:gpr32 = COPY $w3
+    %5:gpr32 = ADDSWrr %1, %3, implicit-def $nzcv
+    %6:gpr32 = ADCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $w0 = COPY %5
+    $w1 = COPY %6
+    RET_ReallyLR implicit $w0, implicit $w1
+...
+---
+name:            test_impdef_sbcsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: test_impdef_sbcsx
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCXr:%[0-9]+]]:gpr64 = SBCXr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %3:gpr64 = COPY $x2
+    %4:gpr64 = COPY $x3
+    %5:gpr64 = SUBSXrr %1, %3, implicit-def $nzcv
+    %6:gpr64 = SBCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $x0 = COPY %5
+    $x1 = COPY %6
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name:            test_impdef_sbcsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1, $w2, $w3
+    ; CHECK-LABEL: name: test_impdef_sbcsw
+    ; CHECK: liveins: $w0, $w1, $w2, $w3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCWr:%[0-9]+]]:gpr32 = SBCWr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]]
+    ; CHECK-NEXT: $w1 = COPY [[SBCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY $w1
+    %3:gpr32 = COPY $w2
+    %4:gpr32 = COPY $w3
+    %5:gpr32 = SUBSWrr %1, %3, implicit-def $nzcv
+    %6:gpr32 = SBCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $w0 = COPY %5
+    $w1 = COPY %6
+    RET_ReallyLR implicit $w0, implicit $w1
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
new file mode 100644
index 0000000000000..e54a8c076fb61
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postselectopt-dead-cc-defs.mir
@@ -0,0 +1,365 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-post-select-optimize -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:            test_fcmp_dead_cc
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+body:             |
+  bb.1:
+    liveins: $w1, $x0, $s0, $s1
+
+    ; CHECK-LABEL: name: test_fcmp_dead_cc
+    ; CHECK: liveins: $w1, $x0, $s0, $s1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr64 = COPY $x0
+    %2:gpr32 = COPY $w1
+    %3:fpr32 = COPY $s0
+    %4:fpr32 = COPY $s1
+    %26:gpr32 = COPY $wzr
+    FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+    FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %14:gpr32common = UBFMWri %12, 1, 31
+    %60:gpr32 = MOVi32imm 1
+    %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
+    $w0 = COPY %16
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_fcmp_64_dead_cc
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+body:             |
+  bb.1:
+    liveins: $w1, $x0, $d0, $d1
+
+    ; CHECK-LABEL: name: test_fcmp_64_dead_cc
+    ; CHECK: liveins: $w1, $x0, $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr64 = COPY $d0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY $d1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPDrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr]], 1, 31
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr64 = COPY $x0
+    %2:gpr32 = COPY $w1
+    %3:fpr64 = COPY $d0
+    %4:fpr64 = COPY $d1
+    %26:gpr32 = COPY $wzr
+    FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+    FCMPDrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %14:gpr32common = UBFMWri %12, 1, 31
+    %60:gpr32 = MOVi32imm 1
+    %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
+    $w0 = COPY %16
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_fcmp_dead_cc_3_fcmps
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+liveins:
+  - { reg: '$x0' }
+  - { reg: '$w1' }
+body:             |
+  bb.1:
+    liveins: $w1, $x0, $s0, $s1
+
+    ; CHECK-LABEL: name: test_fcmp_dead_cc_3_fcmps
+    ; CHECK: liveins: $w1, $x0, $s0, $s1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr32 = COPY $s0
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr32 = COPY $s1
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr32 = COPY $wzr
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def dead $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[SUBWrr1:%[0-9]+]]:gpr32 = SUBWrr [[COPY1]], [[COPY4]]
+    ; CHECK-NEXT: FCMPSrr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $fpcr
+    ; CHECK-NEXT: [[UBFMWri:%[0-9]+]]:gpr32common = UBFMWri [[SUBWrr1]], 1, 31
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[CSELWr:%[0-9]+]]:gpr32 = CSELWr [[UBFMWri]], [[MOVi32imm]], 8, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[CSELWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr64 = COPY $x0
+    %2:gpr32 = COPY $w1
+    %3:fpr32 = COPY $s0
+    %4:fpr32 = COPY $s1
+    %26:gpr32 = COPY $wzr
+    FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+    FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %12:gpr32 = SUBSWrr %2, %26, implicit-def $nzcv
+    FCMPSrr %3, %4, implicit-def $nzcv, implicit $fpcr
+    %14:gpr32common = UBFMWri %12, 1, 31
+    %60:gpr32 = MOVi32imm 1
+    %16:gpr32 = CSELWr %14, %60, 8, implicit $nzcv
+    $w0 = COPY %16
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_impdef_subsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_impdef_subsx
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[SUBXrr:%[0-9]+]]:gpr64 = SUBXrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $x0 = COPY [[SUBXrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %4:gpr64 = SUBSXrr %1, %2, implicit-def $nzcv
+    $x0 = COPY %4
+    RET_ReallyLR implicit $x0
+...
+---
+name:            test_impdef_subsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: test_impdef_subsw
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+    ; CHECK-NEXT: [[SUBWrr:%[0-9]+]]:gpr32 = SUBWrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $w0 = COPY [[SUBWrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr32 = COPY $x0
+    %2:gpr32 = COPY $x1
+    %4:gpr32 = SUBSWrr %1, %2, implicit-def $nzcv
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+...
+---
+name:            test_impdef_addsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: test_impdef_addsx
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $x0 = COPY [[ADDXrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %4:gpr64 = ADDSXrr %1, %2, implicit-def $nzcv
+    $x0 = COPY %4
+    RET_ReallyLR implicit $x0
+...
+---
+name:            test_impdef_addsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: test_impdef_addsw
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $x1
+    ; CHECK-NEXT: [[ADDWrr:%[0-9]+]]:gpr32 = ADDWrr [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $w0 = COPY [[ADDWrr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:gpr32 = COPY $x0
+    %2:gpr32 = COPY $x1
+    %4:gpr32 = ADDSWrr %1, %2, implicit-def $nzcv
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+...
+---
+name:            test_impdef_adcsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: test_impdef_adcsx
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCXr:%[0-9]+]]:gpr64 = ADCXr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %3:gpr64 = COPY $x2
+    %4:gpr64 = COPY $x3
+    %5:gpr64 = ADDSXrr %1, %3, implicit-def $nzcv
+    %6:gpr64 = ADCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $x0 = COPY %5
+    $x1 = COPY %6
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name:            test_impdef_adcsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1, $w2, $w3
+    ; CHECK-LABEL: name: test_impdef_adcsw
+    ; CHECK: liveins: $w0, $w1, $w2, $w3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+    ; CHECK-NEXT: [[ADDSWrr:%[0-9]+]]:gpr32 = ADDSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCWr:%[0-9]+]]:gpr32 = ADCWr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[ADDSWrr]]
+    ; CHECK-NEXT: $w1 = COPY [[ADCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY $w1
+    %3:gpr32 = COPY $w2
+    %4:gpr32 = COPY $w3
+    %5:gpr32 = ADDSWrr %1, %3, implicit-def $nzcv
+    %6:gpr32 = ADCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $w0 = COPY %5
+    $w1 = COPY %6
+    RET_ReallyLR implicit $w0, implicit $w1
+...
+---
+name:            test_impdef_sbcsx
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+    ; CHECK-LABEL: name: test_impdef_sbcsx
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCXr:%[0-9]+]]:gpr64 = SBCXr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %1:gpr64 = COPY $x0
+    %2:gpr64 = COPY $x1
+    %3:gpr64 = COPY $x2
+    %4:gpr64 = COPY $x3
+    %5:gpr64 = SUBSXrr %1, %3, implicit-def $nzcv
+    %6:gpr64 = SBCSXr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $x0 = COPY %5
+    $x1 = COPY %6
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+---
+name:            test_impdef_sbcsw
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1, $w2, $w3
+    ; CHECK-LABEL: name: test_impdef_sbcsw
+    ; CHECK: liveins: $w0, $w1, $w2, $w3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCWr:%[0-9]+]]:gpr32 = SBCWr [[COPY1]], [[COPY3]], implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]]
+    ; CHECK-NEXT: $w1 = COPY [[SBCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %1:gpr32 = COPY $w0
+    %2:gpr32 = COPY $w1
+    %3:gpr32 = COPY $w2
+    %4:gpr32 = COPY $w3
+    %5:gpr32 = SUBSWrr %1, %3, implicit-def $nzcv
+    %6:gpr32 = SBCSWr %2, %4, implicit-def $nzcv, implicit $nzcv
+    $w0 = COPY %5
+    $w1 = COPY %6
+    RET_ReallyLR implicit $w0, implicit $w1
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
new file mode 100644
index 0000000000000..85625ced4ba69
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-sadde.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name:            sadde_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: sadde_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s64), %4:gpr(s32) = G_SADDE %0, %1, %2
+    $x0 = COPY %3(s64)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name:            sadde_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: sadde_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32 = ADCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[ADCSWr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32), %4:gpr(s32) = G_SADDE %0, %1, %2
+    $w0 = COPY %3(s32)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name:            sadde_opt_prev_uaddo
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: sadde_opt_prev_uaddo
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %8:gpr(s64), %12:gpr(s32) = G_UADDO %0, %2
+    %9:gpr(s64), %13:gpr(s32) = G_SADDE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            sadde_opt_prev_uadde
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: sadde_opt_prev_uadde
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %8:gpr(s64), %12:gpr(s32) = G_UADDE %0, %2, %6
+    %9:gpr(s64), %13:gpr(s32) = G_SADDE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            sadde_opt_bail_clobber
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+    ; CHECK-LABEL: name: sadde_opt_bail_clobber
+    ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[CSINCWr]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: $x2 = COPY [[ADCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %4:gpr(s64) = COPY $x4
+    %5:gpr(s64) = COPY $x5
+    %6:gpr(s64) = COPY $x6
+    %7:gpr(s64), %11:gpr(s32) = G_UADDO %0, %4
+    %8:gpr(s64), %12:gpr(s32) = G_UADDE %1, %5, %11
+    ; carry-in is not produced by previous instruction
+    %9:gpr(s64), %13:gpr(s32) = G_SADDE %2, %6, %11
+    $x0 = COPY %7(s64)
+    $x1 = COPY %8(s64)
+    $x2 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
index 8742c7c68458b..42017e96e5a63 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-saddo.mir
@@ -74,7 +74,6 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
@@ -101,7 +100,6 @@ body:             |
     ; CHECK-NEXT: %copy1:gpr32 = COPY $w0
     ; CHECK-NEXT: %copy2:gpr32 = COPY $w1
     ; CHECK-NEXT: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy1:gpr(s32) = COPY $w0
@@ -129,7 +127,6 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
@@ -155,7 +152,6 @@ body:             |
     ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
     ; CHECK-NEXT: %reg1:gpr32 = COPY $w0
     ; CHECK-NEXT: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
-    ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $x0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %reg0:gpr(s64) = COPY $x0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
new file mode 100644
index 0000000000000..00bd26cc0220d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssube.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name:            ssube_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: ssube_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s64), %4:gpr(s32) = G_SSUBE %0, %1, %2
+    $x0 = COPY %3(s64)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name:            ssube_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: ssube_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32 = SBCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[SBCSWr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32), %4:gpr(s32) = G_SSUBE %0, %1, %2
+    $w0 = COPY %3(s32)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name:            ssube_opt_prev_usubo
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: ssube_opt_prev_usubo
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %8:gpr(s64), %12:gpr(s32) = G_USUBO %0, %2
+    %9:gpr(s64), %13:gpr(s32) = G_SSUBE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            ssube_opt_prev_usube
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: ssube_opt_prev_usube
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %8:gpr(s64), %12:gpr(s32) = G_USUBE %0, %2, %6
+    %9:gpr(s64), %13:gpr(s32) = G_SSUBE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            ssube_opt_bail_clobber
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+    ; CHECK-LABEL: name: ssube_opt_bail_clobber
+    ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[CSINCWr]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: $x2 = COPY [[SBCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %4:gpr(s64) = COPY $x4
+    %5:gpr(s64) = COPY $x5
+    %6:gpr(s64) = COPY $x6
+    %7:gpr(s64), %11:gpr(s32) = G_USUBO %0, %4
+    %8:gpr(s64), %12:gpr(s32) = G_USUBE %1, %5, %11
+    ; carry-in is not produced by previous instruction
+    %9:gpr(s64), %13:gpr(s32) = G_SSUBE %2, %6, %11
+    $x0 = COPY %7(s64)
+    $x1 = COPY %8(s64)
+    $x2 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
index 9055dd20d312c..5a47a4515930c 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-ssubo.mir
@@ -74,7 +74,6 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
@@ -101,7 +100,6 @@ body:             |
     ; CHECK-NEXT: %copy1:gpr32 = COPY $w0
     ; CHECK-NEXT: %copy2:gpr32 = COPY $w1
     ; CHECK-NEXT: %add:gpr32 = SUBSWrs %copy1, %copy2, 16, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy1:gpr(s32) = COPY $w0
@@ -129,7 +127,6 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
@@ -155,7 +152,6 @@ body:             |
     ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
     ; CHECK-NEXT: %reg1:gpr32 = COPY $w0
     ; CHECK-NEXT: %add:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
-    ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 7, implicit $nzcv
     ; CHECK-NEXT: $x0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %reg0:gpr(s64) = COPY $x0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
new file mode 100644
index 0000000000000..dc80d0c9abc25
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uadde.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name:            uadde_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: uadde_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s64), %4:gpr(s32) = G_UADDE %0, %1, %2
+    $x0 = COPY %3(s64)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name:            uadde_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: uadde_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32 = ADCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[ADCSWr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32), %4:gpr(s32) = G_UADDE %0, %1, %2
+    $w0 = COPY %3(s32)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name:            uadde_opt_prev_uaddo
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: uadde_opt_prev_uaddo
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %8:gpr(s64), %12:gpr(s32) = G_UADDO %0, %2
+    %9:gpr(s64), %13:gpr(s32) = G_UADDE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            uadde_opt_prev_uadde
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: uadde_opt_prev_uadde
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32common = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[MOVi32imm]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %8:gpr(s64), %12:gpr(s32) = G_UADDE %0, %2, %6
+    %9:gpr(s64), %13:gpr(s32) = G_UADDE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            uadde_opt_bail_clobber
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+    ; CHECK-LABEL: name: uadde_opt_bail_clobber
+    ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+    ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32common = CSINCWr $wzr, $wzr, 3, implicit $nzcv
+    ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64 = ADCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[CSINCWr]], 1, 0, implicit-def $nzcv
+    ; CHECK-NEXT: [[ADCSXr1:%[0-9]+]]:gpr64 = ADCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[ADDSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[ADCSXr]]
+    ; CHECK-NEXT: $x2 = COPY [[ADCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %4:gpr(s64) = COPY $x4
+    %5:gpr(s64) = COPY $x5
+    %6:gpr(s64) = COPY $x6
+    %7:gpr(s64), %11:gpr(s32) = G_UADDO %0, %4
+    %8:gpr(s64), %12:gpr(s32) = G_UADDE %1, %5, %11
+    ; carry-in is not produced by previous instruction
+    %9:gpr(s64), %13:gpr(s32) = G_UADDE %2, %6, %11
+    $x0 = COPY %7(s64)
+    $x1 = COPY %8(s64)
+    $x2 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
index c325eb809eca2..55984b116e1ca 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-uaddo.mir
@@ -74,7 +74,6 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
@@ -101,7 +100,6 @@ body:             |
     ; CHECK-NEXT: %copy1:gpr32 = COPY $w0
     ; CHECK-NEXT: %copy2:gpr32 = COPY $w1
     ; CHECK-NEXT: %add:gpr32 = ADDSWrs %copy1, %copy2, 16, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy1:gpr(s32) = COPY $w0
@@ -129,7 +127,6 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
@@ -155,7 +152,6 @@ body:             |
     ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
     ; CHECK-NEXT: %reg1:gpr32 = COPY $w0
     ; CHECK-NEXT: %add:gpr64 = ADDSXrx %reg0, %reg1, 18, implicit-def $nzcv
-    ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 3, implicit $nzcv
     ; CHECK-NEXT: $x0 = COPY %add
     ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %reg0:gpr(s64) = COPY $x0

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
new file mode 100644
index 0000000000000..c532474fc67b4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usube.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel-abort=1 -run-pass=instruction-select %s -o - | FileCheck %s
+...
+---
+name:            usube_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: usube_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s64), %4:gpr(s32) = G_USUBE %0, %1, %2
+    $x0 = COPY %3(s64)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $x0, implicit $w1
+...
+...
+---
+name:            usube_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $w0, $w1
+
+    ; CHECK-LABEL: name: usube_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32 = SBCSWr [[COPY]], [[COPY1]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+    ; CHECK-NEXT: $w0 = COPY [[SBCSWr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    %0:gpr(s32) = COPY $w0
+    %1:gpr(s32) = COPY $w1
+    %2:gpr(s32) = G_CONSTANT i32 1
+    %3:gpr(s32), %4:gpr(s32) = G_USUBE %0, %1, %2
+    $w0 = COPY %3(s32)
+    $w1 = COPY %4(s32)
+    RET_ReallyLR implicit $w0, implicit $w1
+...
+...
+---
+name:            usube_opt_prev_usubo
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: usube_opt_prev_usubo
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY2]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %8:gpr(s64), %12:gpr(s32) = G_USUBO %0, %2
+    %9:gpr(s64), %13:gpr(s32) = G_USUBE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            usube_opt_prev_usube
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x3
+
+    ; CHECK-LABEL: name: usube_opt_prev_usube
+    ; CHECK: liveins: $x0, $x1, $x2, $x3
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3
+    ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 1
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[MOVi32imm]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY]], [[COPY2]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY3]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %3:gpr(s64) = COPY $x3
+    %6:gpr(s32) = G_CONSTANT i32 1
+    %8:gpr(s64), %12:gpr(s32) = G_USUBE %0, %2, %6
+    %9:gpr(s64), %13:gpr(s32) = G_USUBE %1, %3, %12
+    $x0 = COPY %8(s64)
+    $x1 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1
+...
+...
+---
+name:            usube_opt_bail_clobber
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1:
+    liveins: $x0, $x1, $x2, $x4, $x5, $x6
+
+    ; CHECK-LABEL: name: usube_opt_bail_clobber
+    ; CHECK: liveins: $x0, $x1, $x2, $x4, $x5, $x6
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2
+    ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x4
+    ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr64 = COPY $x5
+    ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr64 = COPY $x6
+    ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY3]], implicit-def $nzcv
+    ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
+    ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64 = SBCSXr [[COPY1]], [[COPY4]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr $wzr, [[CSINCWr]], implicit-def $nzcv
+    ; CHECK-NEXT: [[SBCSXr1:%[0-9]+]]:gpr64 = SBCSXr [[COPY2]], [[COPY5]], implicit-def $nzcv, implicit $nzcv
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $x1 = COPY [[SBCSXr]]
+    ; CHECK-NEXT: $x2 = COPY [[SBCSXr1]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+    %0:gpr(s64) = COPY $x0
+    %1:gpr(s64) = COPY $x1
+    %2:gpr(s64) = COPY $x2
+    %4:gpr(s64) = COPY $x4
+    %5:gpr(s64) = COPY $x5
+    %6:gpr(s64) = COPY $x6
+    %7:gpr(s64), %11:gpr(s32) = G_USUBO %0, %4
+    %8:gpr(s64), %12:gpr(s32) = G_USUBE %1, %5, %11
+    ; carry-in is not produced by previous instruction
+    %9:gpr(s64), %13:gpr(s32) = G_USUBE %2, %6, %11
+    $x0 = COPY %7(s64)
+    $x1 = COPY %8(s64)
+    $x2 = COPY %9(s64)
+    RET_ReallyLR implicit $x0, implicit $x1, implicit $x2
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
index 45e02b40812d1..1fad9d41aa404 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-usubo.mir
@@ -19,13 +19,15 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
     ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: $w0 = COPY [[SUBSWrr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
     %0:gpr(s32) = COPY $w0
     %1:gpr(s32) = COPY $w1
     %3:gpr(s32), %4:gpr(s32) = G_USUBO %0, %1
-    $w0 = COPY %4(s32)
-    RET_ReallyLR implicit $w0
+    $w0 = COPY %3
+    $w1 = COPY %4
+    RET_ReallyLR implicit $w0, implicit $w1
 
 ...
 ---
@@ -45,13 +47,15 @@ body:             |
     ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
     ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv
     ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
-    ; CHECK-NEXT: $w0 = COPY [[CSINCWr]]
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    ; CHECK-NEXT: $x0 = COPY [[SUBSXrr]]
+    ; CHECK-NEXT: $w1 = COPY [[CSINCWr]]
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
     %0:gpr(s64) = COPY $x0
     %1:gpr(s64) = COPY $x1
     %3:gpr(s64), %4:gpr(s32) = G_USUBO %0, %1
-    $w0 = COPY %4
-    RET_ReallyLR implicit $w0
+    $x0 = COPY %3
+    $w1 = COPY %4
+    RET_ReallyLR implicit $x0, implicit $w1
 
 ...
 ---
@@ -70,16 +74,13 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = SUBSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
-    ; CHECK-NEXT: $w1 = COPY %overflow
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
     %constant:gpr(s32) = G_CONSTANT i32 16
     %add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy, %constant
     $w0 = COPY %add
-    $w1 = COPY %overflow
-    RET_ReallyLR implicit $w0, implicit $w1
+    RET_ReallyLR implicit $w0
 
 ...
 ---
@@ -99,18 +100,15 @@ body:             |
     ; CHECK-NEXT: %copy1:gpr32 = COPY $w0
     ; CHECK-NEXT: %copy2:gpr32 = COPY $w1
     ; CHECK-NEXT: %add:gpr32 = SUBSWrs %copy1, %copy2, 16, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
-    ; CHECK-NEXT: $w1 = COPY %overflow
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy1:gpr(s32) = COPY $w0
     %copy2:gpr(s32) = COPY $w1
     %constant:gpr(s32) = G_CONSTANT i32 16
     %shift:gpr(s32) = G_SHL %copy2(s32), %constant(s32)
     %add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy1, %shift
     $w0 = COPY %add(s32)
-    $w1 = COPY %overflow(s32)
-    RET_ReallyLR implicit $w0, implicit $w1
+    RET_ReallyLR implicit $w0
 
 ...
 ---
@@ -129,16 +127,13 @@ body:             |
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: %copy:gpr32sp = COPY $w0
     ; CHECK-NEXT: %add:gpr32 = ADDSWri %copy, 16, 0, implicit-def $nzcv
-    ; CHECK-NEXT: %overflow:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
     ; CHECK-NEXT: $w0 = COPY %add
-    ; CHECK-NEXT: $w1 = COPY %overflow
-    ; CHECK-NEXT: RET_ReallyLR implicit $w0, implicit $w1
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
     %copy:gpr(s32) = COPY $w0
     %constant:gpr(s32) = G_CONSTANT i32 -16
     %add:gpr(s32), %overflow:gpr(s32) = G_USUBO %copy, %constant
     $w0 = COPY %add(s32)
-    $w1 = COPY %overflow(s32)
-    RET_ReallyLR implicit $w0, implicit $w1
+    RET_ReallyLR implicit $w0
 
 ...
 ---
@@ -157,10 +152,8 @@ body:             |
     ; CHECK-NEXT: %reg0:gpr64sp = COPY $x0
     ; CHECK-NEXT: %reg1:gpr32 = COPY $w0
     ; CHECK-NEXT: %add:gpr64 = SUBSXrx %reg0, %reg1, 18, implicit-def $nzcv
-    ; CHECK-NEXT: %flags:gpr32 = CSINCWr $wzr, $wzr, 2, implicit $nzcv
     ; CHECK-NEXT: $x0 = COPY %add
-    ; CHECK-NEXT: $w1 = COPY %flags
-    ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $w1
+    ; CHECK-NEXT: RET_ReallyLR implicit $x0
     %reg0:gpr(s64) = COPY $x0
     %reg1:gpr(s32) = COPY $w0
     %ext:gpr(s64) = G_ZEXT %reg1(s32)
@@ -168,5 +161,4 @@ body:             |
     %shift:gpr(s64) = G_SHL %ext, %cst(s64)
     %add:gpr(s64), %flags:gpr(s32) = G_USUBO %reg0, %shift
     $x0 = COPY %add(s64)
-    $w1 = COPY %flags
-    RET_ReallyLR implicit $x0, implicit $w1
+    RET_ReallyLR implicit $x0

diff  --git a/llvm/test/CodeGen/AArch64/popcount.ll b/llvm/test/CodeGen/AArch64/popcount.ll
index 1dd282a142193..0a3ee98f843c8 100644
--- a/llvm/test/CodeGen/AArch64/popcount.ll
+++ b/llvm/test/CodeGen/AArch64/popcount.ll
@@ -57,7 +57,6 @@ define i16 @popcount256(ptr nocapture nonnull readonly %0) {
 ; CHECK-NEXT:    // kill: def $x10 killed $w10
 ; CHECK-NEXT:    bfi x8, x10, #32, #32
 ; CHECK-NEXT:    adds x8, x8, x9
-; CHECK-NEXT:    cset w9, hs
 ; CHECK-NEXT:    mov w0, w8
 ; CHECK-NEXT:    ret
 Entry: